# load necessary packages and data
library(tidyr)
library(dplyr)
library(readr)
library(plotly)
library(ggplot2)
library(jgcricolors)
library(Polychrome)
setwd("C:/Users/spei632/Documents/GCAM_industry/food_processing")
fig_dir <- "C:/Users/spei632/Documents/GCAM_industry/food_processing/initial_exploration/figures_energy_calorie_coef_calc"
if(!dir.exists(fig_dir)) {
dir.create(fig_dir)
}
# load data
# energy balances data from IEA, intermediate processed version from gcamdata
IEA_en_bal <- read_csv("initial_exploration/data/L100.IEA_en_bal_ctry_hist.csv")
# FAO population and GDP data
fao_pop <- read_csv("initial_exploration/data/FAOSTAT_data_pop_12-29-2022.csv")
fao_gdp <- read_csv("initial_exploration/data/FAOSTAT_data_gdp_12-29-2022.csv")
# FAO food data
macronutrientrate_2010_2019_mean <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean.csv", skip = 8)
fao_sua_2010_2019 <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019.csv", skip = 8)
fao_fbsh_1973_2009 <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009.csv", skip = 8)
# gcamdata calorie consumption data
L101.CropMeat_Food_Pcal_R_C_Y <- read_csv("initial_exploration/data/L101.CropMeat_Food_Pcal_R_C_Y.csv")
# gcamdata GDP data
L102.pcgdp_thous90USD_Scen_R_Y <- read_csv("initial_exploration/data/L102.pcgdp_thous90USD_Scen_R_Y.csv")
L102.gdp_mil90usd_Scen_R_Y <- read_csv("initial_exploration/data/L102.gdp_mil90usd_Scen_R_Y.csv")
# resulting data from implemented infilling
L1328.EJ_R_food_F_Yh_est_to_infill_adders <- read_csv("initial_exploration/data/L1328.EJ_R_food_F_Yh_est_to_infill_adders_5-19-23.csv")
L1328.in_EJ_R_food_F_Yh_recal <- read_csv("initial_exploration/data/L1328.in_EJ_R_food_F_Yh_recal_5-19-23.csv")
L1328.in_EJ_R_indenergy_F_Yh_tmp_2 <- read_csv("initial_exploration/data/L1328.in_EJ_R_indenergy_F_Yh_tmp_2_5-19-23.csv")
L1328.out_Pcal_R_food_Yh <- read_csv("initial_exploration/data/L1328.out_Pcal_R_food_Yh_5-19-23.csv")
# set constants
CONV_KTOE_EJ <- 0.00004186
hist_years <- c(1971:2015)
gcam_hist_years <- c(1990, 2005, 2010, 2015)
conv_P_k <- 10^12
# mappings
IEA_product_fuel <- read_csv("mappings/IEA_product_fuel.csv", skip = 7)
enduse_fuel_aggregation <- read_csv("mappings/enduse_fuel_aggregation.csv", skip = 5)
IEA_ctry <- read_csv("mappings/iea_ctry.csv", skip = 5)
iso_GCAM_regID <- read_csv("mappings/iso_GCAM_regID.csv", skip = 6)
GCAM_region_names <- read_csv("mappings/GCAM_region_names.csv", skip = 6)
AGLU_ctry <- read_csv("mappings/AGLU_ctry.csv", skip = 7)
Mapping_SUA_PrimaryEquivalent <- read_csv("initial_exploration/data/Mapping_SUA_PrimaryEquivalent.csv", skip = 21)
Mapping_item_FBS_GCAM <- read_csv("initial_exploration/data/Mapping_item_FBS_GCAM.csv", skip = 7)
SUA_item_code_map <- read_csv("initial_exploration/data/SUA_item_code_map.csv", skip = 9)
Area_Region_Map <- read_csv("initial_exploration/data/Area_Region_Map.csv")
GCAM_commodity_type_mapping <- read_csv("mappings/GCAM_commodity_type_mapping.csv")
# colors and palettes
data(palette36)
palette36 <- unname(palette36)
pal_sel <- jgcricol()$pal_all
# assumptions/criteria for filtering data
min_frac_foodpro <- 0.01
min_year <- 1990
max_frac_inonspec <- 0.5
override_frac_foodpro <- 0.1
max_pval <- 0.05
Calculate GDP per capita from the GDP and population data from the FAO.
# get from the FAO data (also include population)
fao_gdp_per_cap_hist <- fao_gdp %>%
filter(Element == "Value US$ per capita, 2015 prices" & Year <= 2015) %>%
left_join(fao_pop %>% mutate(pop = Value * 1000) %>% dplyr::select(Area, Year, pop))
# prep FAO data
fao_gdp_per_cap_hist_relabel <- fao_gdp_per_cap_hist %>%
# exclude USSR values in 1990, because these are covered by individual regions
filter(!(Area == "USSR" & Year == 1990)) %>%
# also exclude the "China" GDP since that includes Hong Kong and Macao as well, which are also broken out; there is another one that is "China, mainland" which we keep
filter(Area != "China") %>%
mutate(GDP_pcap_thous2015USD_FAO = Value / 1000) %>%
dplyr::select(area = Area, year = Year, GDP_pcap_thous2015USD_FAO, pop) %>%
# edit a few areas and join to isos and GCAM region IDs
mutate(area = case_when(area == "Côte d'Ivoire" ~ "Cote dIvoire",
area == "Curaçao" ~ "Curacao",
area == "Democratic People's Republic of Korea" ~ "Democratic Peoples Republic of Korea",
area == "Lao People's Democratic Republic" ~ "Lao Peoples Democratic Republic",
area == "Sint Maarten (Dutch part)" ~ "Sint Maarten (Dutch Part)",
area == "Türkiye" ~ "Turkiye",
grepl("Yemen", area) ~ "Yemen",
TRUE ~ area)) %>%
group_by(area, year) %>%
summarize(GDP_pcap_thous2015USD_FAO = weighted.mean(GDP_pcap_thous2015USD_FAO, pop),
pop = sum(pop)) %>%
ungroup() %>%
# get total GDP
mutate(GDP_mil2015USD_FAO = GDP_pcap_thous2015USD_FAO * pop / 1000) %>%
left_join(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
left_join(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso")
GDP_per_cap_hist_calc_FAO <- fao_gdp_per_cap_hist_relabel %>%
rename(area_FAO = area)
Select just the SSP2 data from the gcamdata GDP data.
GDP_pcap_gcamdata_sel <- L102.pcgdp_thous90USD_Scen_R_Y %>% filter(scenario == "gSSP2") %>%
rename(GDP_pcap_thous90USD = value)
GDP_gcamdata_sel <- L102.gdp_mil90usd_Scen_R_Y %>% filter(scenario == "gSSP2") %>%
rename(GDP_mil90USD = value)
First get IEA data for industry (non-specified and specific industries).
# industry flows
flows_sel <- c("MINING", "CONSTRUC", "IRONSTL", "CHEMICAL", "NONFERR", "NONMET", "TRANSEQ", "MACHINE", "FOODPRO", "PAPERPRO", "WOODPRO", "TEXTILES", "INONSPEC")
# get industry data and map correctly
IEA_ind_sectors_region_fuel <- IEA_en_bal %>%
filter(FLOW %in% flows_sel) %>%
dplyr::select(iso, FLOW, PRODUCT, as.character(hist_years)) %>%
left_join(select(iso_GCAM_regID, iso, GCAM_region_ID, country_name), by = "iso") %>%
left_join(select(IEA_product_fuel, fuel, PRODUCT = product), by = "PRODUCT") %>%
left_join(enduse_fuel_aggregation %>% dplyr::select(fuel, industry)) %>%
# remap biomass_tradbio to biomass
mutate(industry = if_else(fuel == "biomass_tradbio", "biomass", industry)) %>%
pivot_longer(as.character(hist_years), names_to = "year", values_to = "value") %>%
rename(fuel_orig = fuel, fuel = industry) %>%
filter(!is.na(fuel)) %>%
group_by(iso, country_name, GCAM_region_ID, year, fuel, FLOW) %>%
# summarize and convert to EJ
summarize(value = sum(value, na.rm = TRUE) * CONV_KTOE_EJ) %>%
ungroup() %>%
mutate(year = as.numeric(year)) %>%
# get fraction of each sector to the total fuel use
group_by(iso, country_name, GCAM_region_ID, year, fuel) %>%
mutate(frac_sector = value / sum(value)) %>%
ungroup()
# complete nesting so that there are values for all sectors, fuels, and years, even if 0
all_IEA_years <- unique(IEA_ind_sectors_region_fuel$year)
all_IEA_fuels <- unique(IEA_ind_sectors_region_fuel$fuel)
IEA_ind_sectors_region_fuel <- IEA_ind_sectors_region_fuel %>%
complete(nesting(iso, country_name, GCAM_region_ID),
year = all_IEA_years,
FLOW = flows_sel,
fuel = all_IEA_fuels) %>%
mutate(value = replace_na(value, 0),
frac_sector = replace_na(frac_sector, 0))
# aggregate to GCAM region level
IEA_ind_sectors_GCAM_region_fuel <- IEA_ind_sectors_region_fuel %>%
group_by(GCAM_region_ID, year, fuel, FLOW) %>%
summarize(value = sum(value)) %>%
ungroup() %>%
full_join(GCAM_region_names) %>%
group_by(GCAM_region_ID, year, fuel) %>%
mutate(frac_sector = value / sum(value)) %>%
ungroup()
# get the regions and fuels with all their energy in INONSPEC in all years
country_fuels_all_inonspec <- IEA_ind_sectors_region_fuel %>%
filter(FLOW == "INONSPEC") %>%
group_by(iso, country_name, GCAM_region_ID, fuel) %>%
filter(all(frac_sector == 1)) %>%
dplyr::select(iso, country_name, GCAM_region_ID, fuel) %>%
distinct()
# aggregate to total energy
IEA_ind_sectors_region_total_en <- IEA_ind_sectors_region_fuel %>%
group_by(iso, country_name, GCAM_region_ID, year, FLOW) %>%
summarize(value = sum(value)) %>%
group_by(iso, country_name, GCAM_region_ID, year) %>%
mutate(frac = value / sum(value)) %>%
ungroup()
# aggregate to GCAM region
IEA_ind_sectors_GCAM_region_total_en <- IEA_ind_sectors_region_total_en %>%
group_by(GCAM_region_ID, year, FLOW) %>%
summarize(value = sum(value)) %>%
group_by(GCAM_region_ID, year) %>%
mutate(frac = value / sum(value)) %>%
ungroup()
Get food consumption from the FAO data. This is modified from the zchunk_LA100.FAO_SUA_PrimaryEquivalent.R processing code to process into GCAM commodities and calories, but does not do some infilling of data (using the global average values for macronutrient rates for regions missing those data) and does not exclude NEC (not otherwise classified) calorie data from the total food calories.
# modified from zchunk_LA100.FAO_SUA_PrimaryEquivalent.R - processing into GCAM commodities and calories, but not doing some infilling of data with global averages that is done in the chunk
# 2010-2019 data first
Mapping_SUA_PrimaryEquivalent %>%
select(GCAM_commodity, item = source_item) %>%
bind_rows(Mapping_SUA_PrimaryEquivalent %>%
select(GCAM_commodity, item = sink_item)) %>%
distinct() %>% arrange(GCAM_commodity) ->
SUA_Items_GCAM
SUA_item_code_map %>%
filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM
SUA_item_code_map %>%
filter(item_code %in% unique(macronutrientrate_2010_2019_mean$item_code)) %>%
left_join(SUA_Items_GCAM, by = "item") %>%
# For NA GCAM_commodity: not elsewhere classified (NEC)
# So we would know % of food calories not included in GCAM commodities
mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) ->
SUA_Items_Food
# get world average macronutrient rate
macronutrientrate_2010_2019_mean %>% tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>%
group_by(item, item_code, macronutrient) %>%
summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>%
ungroup() ->
SUA_food_macronutrient_rate_World
# calculate SUA food Calories consumption by joining macronutrient rates and SUA food - adapted from zchunk_LA100.FAO_SUA_PrimaryEquivalent.R, not using world average for regions with missing data
fao_sua_2010_2019 %>%
filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>%
# ensure we at least have a complete series across time otherwise it may
# throw off moving avg calculations
complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>%
rename(Food_Kt = value) %>%
select(-element) %>%
gcamdata::left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
gcamdata::repeat_add_columns(
tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>%
left_join(macronutrientrate_2010_2019_mean %>%
tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc),
by = c("area_code", "item_code", "item", "macronutrient")) %>%
# gcamdata::left_join_error_no_match(SUA_food_macronutrient_rate_World,
# by = c("item_code", "item", "macronutrient")) %>%
# mutate(macronutrient_value = if_else(is.na(macronutrient_value),
# macronutrient_value_World,
# macronutrient_value)) %>%
# calculate total Cal, protein and fat in food
# value was in 1000 ton or 10^ 9 g
mutate(value = macronutrient_value * Food_Kt,
value = if_else(macronutrient %in% c("fatperc", "proteinperc"),
value / 100 /1000, value)) %>% # unit from perc to Mt
#select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>%
select(-macronutrient_value) %>%
# rename element with units
mutate(macronutrient = case_when(
macronutrient == "calperg" ~ "MKcal",
macronutrient == "fatperc" ~ "MtFat",
macronutrient == "proteinperc" ~ "MtProtein" )) %>%
gcamdata::left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
FAO_Food_Macronutrient_All_2010_2019
# calculate macronutrient rates from the aggregated values - aggregated to GCAM commodities
macronutrientrate_agg_GCAM_commod_2010_2019 <- FAO_Food_Macronutrient_All_2010_2019 %>%
pivot_wider(names_from = macronutrient, values_from = value) %>%
# in this calculation, we only want to include food that has an associated calorie content (to not have too high of a denominator)
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, Food_Kt)) %>%
group_by(area_code, area, year, GCAM_commodity, iso, GCAM_region_ID) %>%
summarize(Food_Kt = sum(Food_Kt_w_calorie),
MKcal = sum(MKcal, na.rm = TRUE),
MtFat = sum(MtFat, na.rm = TRUE),
MtProtein = sum(MtProtein, na.rm = TRUE)) %>%
mutate(calperg = MKcal / Food_Kt,
fatperc = MtFat / Food_Kt * 100 * 1000,
proteinperc = MtProtein / Food_Kt * 100 * 1000)
# aggregate to mean of all years
macronutrientrate_agg_GCAM_commod_2010_2019_mean <- macronutrientrate_agg_GCAM_commod_2010_2019 %>%
group_by(area_code, area, GCAM_commodity, iso, GCAM_region_ID) %>%
summarize(Food_Kt_sum = sum(Food_Kt),
MKcal_sum = sum(MKcal),
MtFat_sum = sum(MtFat),
MtProtein_sum = sum(MtProtein),
calperg_mean = mean(calperg, na.rm = TRUE),
fatperc = mean(fatperc, na.rm = TRUE),
proteinperc = mean(proteinperc, na.rm = TRUE),
calperg_mean_weighted = MKcal_sum / Food_Kt_sum) %>%
ungroup()
# aggregate to total calories
FAO_Food_Macronutrient_All_2010_2019_total <- FAO_Food_Macronutrient_All_2010_2019 %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
group_by(area, year, iso, GCAM_region_ID, macronutrient) %>%
summarize(macronutrient_value = sum(value, na.rm = TRUE),
Food_Kt = sum(Food_Kt),
Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
ungroup() %>%
mutate(year = as.numeric(year))
# aggregate to staples, meat, and other non-staples
FAO_Food_Macronutrient_All_2010_2019_total_cats <- FAO_Food_Macronutrient_All_2010_2019 %>%
left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
group_by(area, year, iso, GCAM_region_ID, commodity_type_broad, macronutrient) %>%
summarize(macronutrient_value = sum(value, na.rm = TRUE),
Food_Kt = sum(Food_Kt),
Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
ungroup() %>%
mutate(year = as.numeric(year))
# aggregate by GCAM commodity
FAO_Food_Macronutrient_All_2010_2019_commodity <- FAO_Food_Macronutrient_All_2010_2019 %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
group_by(area, year, iso, GCAM_region_ID, GCAM_commodity, macronutrient) %>%
summarize(macronutrient_value = sum(value, na.rm = TRUE),
Food_Kt = sum(Food_Kt),
Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
ungroup() %>%
mutate(year = as.numeric(year))
# data before 2010
fao_fbsh_1973_2009 %>%
gcamdata::gather_years() %>%
filter(year < min(fao_sua_2010_2019$year)) %>%
filter(!is.na(value)) ->
FBSH_CB
Mapping_item_FBS_GCAM %>%
select(item_code, GCAM_commodity)%>%
filter(!is.na(GCAM_commodity)) %>%
left_join(FBSH_CB %>%
# complete element
complete(nesting(area, area_code, item_code, item, year), element,
fill = list(value = 0)),
by = "item_code") %>%
dplyr::group_by_at(vars(-value, -item, -item_code)) %>%
summarise(value = sum(value), .groups = "drop") %>%
gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
#dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>%
dplyr::group_by_at(vars(area, region, GCAM_region_ID, year, GCAM_commodity, element, iso, unit)) %>%
summarise(value = sum(value), .groups = "drop") ->
FBSH_CB_reg
# get just food
FBSH_CB_reg_food <- FBSH_CB_reg %>%
filter(element == "Food" & !is.na(unit)) %>%
mutate(unit = "Kt")
# convert to calories using the (weighted) mean of the conversion rate from the 2010-2019 data
FBSH_CB_reg_food_cal <- FBSH_CB_reg_food %>%
left_join(macronutrientrate_agg_GCAM_commod_2010_2019_mean %>%
dplyr::select(area, iso, GCAM_region_ID, GCAM_commodity, calperg = calperg_mean_weighted),
by = c("area", "GCAM_region_ID", "iso", "GCAM_commodity")) %>%
mutate(MKcal = value * calperg)
# aggregate to total calories
FBSH_CB_reg_food_cal_total <- FBSH_CB_reg_food_cal %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
group_by(area, year, iso, GCAM_region_ID, region) %>%
summarize(MKcal = sum(MKcal, na.rm = TRUE),
Food_Kt = sum(value),
Food_Kt_w_calorie = sum(Food_Kt_w_calorie)) %>%
ungroup()
# aggregate to staples, meat, and other non-staples
FBSH_CB_reg_food_cal_total_cats <- FBSH_CB_reg_food_cal %>%
left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
group_by(area, year, iso, GCAM_region_ID, region, commodity_type_broad) %>%
summarize(MKcal = sum(MKcal, na.rm = TRUE),
Food_Kt = sum(value),
Food_Kt_w_calorie = sum(Food_Kt_w_calorie)) %>%
ungroup()
# combine the pre-2010 and 2010-2019 data
total_cal_food_kt_all_years <- FAO_Food_Macronutrient_All_2010_2019_total %>%
pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal) %>%
rbind(FBSH_CB_reg_food_cal_total %>%
dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt, Food_Kt_w_calorie)) %>%
mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
arrange(area, year)
total_cal_food_kt_all_years_cats <- FAO_Food_Macronutrient_All_2010_2019_total_cats %>%
pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal, commodity_type_broad) %>%
rbind(FBSH_CB_reg_food_cal_total_cats %>%
dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt, Food_Kt_w_calorie, commodity_type_broad)) %>%
mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
arrange(area, year, commodity_type_broad)
# combine the GCAM-commodity level data
total_cal_food_kt_all_years_commodity <- FAO_Food_Macronutrient_All_2010_2019_commodity %>%
pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal, GCAM_commodity) %>%
rbind(FBSH_CB_reg_food_cal %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt = value, Food_Kt_w_calorie, GCAM_commodity)) %>%
mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
arrange(area, year, GCAM_commodity)
# also obtain animal feed in production units
fao_sua_2010_2019 %>%
filter(element == "Feed", item_code %in% SUA_Items_Food$item_code) %>%
rename(Feed_Kt = value) %>%
select(-element) %>%
gcamdata::left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
gcamdata::left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
FAO_Feed_Production_All_2010_2019
# aggregate to total feed production by region
total_feed_kt_2010_2019 <- FAO_Feed_Production_All_2010_2019 %>%
group_by(area_code, area, year, iso, GCAM_region_ID) %>%
summarize(Feed_Kt = sum(Feed_Kt)) %>%
mutate(year = as.numeric(year)) %>%
ungroup()
# get pre-2010 data for feed
FBSH_CB_reg_feed <- FBSH_CB_reg %>%
filter(element == "Feed" & !is.na(unit)) %>%
mutate(unit = "Kt")
total_feed_kt_till_2010 <- FBSH_CB_reg_feed %>%
group_by(area, year, iso, GCAM_region_ID) %>%
summarize(Feed_Kt = sum(value)) %>%
ungroup()
# join
total_feed_kt_all_years <- total_feed_kt_2010_2019 %>%
dplyr::select(area, year, iso, GCAM_region_ID, Feed_Kt) %>%
rbind(total_feed_kt_till_2010 %>%
dplyr::select(area, year, iso, GCAM_region_ID, Feed_Kt)) %>%
arrange(area, year)
Now using the gcamdata calorie consumption data, L101.CropMeat_Food_Pcal_R_C_Y. These data use the global average values for macronutrient rate to infill for regions that are missing those data, as well as exclude NEC (not otherwise classified) calorie data from the total food calories. Aggregate these data to total calories and calories of staples and nonstaples.
# aggregate into total calories
total_cal_food_gcamdata_GCAM_reg <- L101.CropMeat_Food_Pcal_R_C_Y %>%
group_by(GCAM_region_ID, year) %>%
summarize(PKcal = sum(value) / 1000) %>%
ungroup()
# also get calories from staples and nonstaples
staples_ns_cal_food_gcamdata_GCAM_reg <- L101.CropMeat_Food_Pcal_R_C_Y %>%
left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
mutate(commodity_type_broad = ifelse(grepl("nonstaples", commodity_type_broad), "nonstaples", commodity_type_broad)) %>%
group_by(GCAM_region_ID, year, commodity_type_broad) %>%
summarize(PKcal = sum(value) / 1000) %>%
ungroup() %>%
pivot_wider(names_from = commodity_type_broad, values_from = PKcal)
Plotting energy use by each of the industry sectors in the IEA data, as well as specifically food processing and other non-specified industry energy. These figures are saved but not printed below.
# plot energy use by industry sectors
fig <- ggplot(IEA_ind_sectors_region_total_en,
aes(x = year, y = value, color = FLOW)) +
geom_line() +
scale_color_manual(values = c("gray20", "red", "firebrick4", "deepskyblue1", "dodgerblue3",
"olivedrab", "darkgreen", "palegreen", "peachpuff2", "pink", "mediumpurple",
"gold1", "orange"), name = "sector") +
facet_wrap(~country_name, scale = "free") +
labs(x = "", y = "EJ", title = "Industry energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_by_sector_country_hist_IEA.png"), fig, width = 35, height = 20)
# plot fractions in food processing and non-specified industry
fig <- ggplot(IEA_ind_sectors_region_total_en %>% filter(FLOW %in% c("INONSPEC", "FOODPRO")),
aes(x = year, y = frac, color = FLOW)) +
geom_line() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "slategray") +
scale_color_manual(values = c("firebrick4", "deepskyblue1"), name = "sector") +
facet_wrap(~country_name, scale = "free_x") +
labs(x = "", y = "Fraction", title = "Fraction of total industry energy use in non-specified industry and food processing (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_frac_inonspec_foodpro_country_hist_IEA.png"), fig, width = 35, height = 20)
# replot at a GCAM region level
fig <- ggplot(IEA_ind_sectors_GCAM_region_total_en %>%
left_join(GCAM_region_names),
aes(x = year, y = value, color = FLOW)) +
geom_line() +
scale_color_manual(values = c("gray20", "red", "firebrick4", "deepskyblue1", "dodgerblue3",
"olivedrab", "darkgreen", "palegreen", "peachpuff2", "pink", "mediumpurple",
"gold1", "orange"), name = "sector") +
facet_wrap(~region, scale = "free") +
labs(x = "", y = "EJ", title = "Industry energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_by_sector_GCAM_reg_hist_IEA.png"), fig, width = 35, height = 20)
fig <- ggplot(IEA_ind_sectors_GCAM_region_total_en %>% filter(FLOW %in% c("INONSPEC", "FOODPRO")) %>%
left_join(GCAM_region_names),
aes(x = year, y = frac, color = FLOW)) +
geom_line() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "slategray") +
scale_color_manual(values = c("firebrick4", "deepskyblue1"), name = "sector") +
facet_wrap(~region, scale = "free_x") +
labs(x = "", y = "Fraction", title = "Fraction of total industry energy use in non-specified industry and food processing (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_frac_inonspec_foodpro_GCAM_reg_hist_IEA.png"), fig, width = 35, height = 20)
# plot the share of industry energy in food processing for just the top 10 regions with the largest shares in 2015
sel_data <- IEA_ind_sectors_GCAM_region_total_en %>%
filter(FLOW == "FOODPRO" & year == 2015) %>%
left_join(GCAM_region_names) %>%
arrange(desc(frac)) %>%
head(10) %>%
mutate(percent = frac * 100)
sel_data <- sel_data %>% mutate(region = factor(region, levels = unique(sel_data$region)))
fig <- ggplot(sel_data,
aes(x = region, y = percent)) +
geom_col(fill = I("#00931d")) +
labs(x = "", y = "Percent", title = "Share of total manufacturing energy used in food processing\nfor the top 10 regions in 2015, IEA data") +
theme_classic() +
theme(text = element_text(size = 24),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/ind_energy_frac_foodpro_GCAM_reg_2015_top_10_IEA.png"), fig, width = 12, height = 8)
# plot for all regions
sel_data_2 <- IEA_ind_sectors_GCAM_region_total_en %>%
filter(FLOW == "FOODPRO" & year == 2015) %>%
left_join(GCAM_region_names) %>%
arrange(desc(frac)) %>%
mutate(percent = frac * 100)
sel_data_2 <- sel_data_2 %>% mutate(region = factor(region, levels = unique(sel_data_2$region)))
fig <- ggplot(sel_data_2,
aes(x = region, y = percent)) +
geom_col(fill = I("#00931d")) +
labs(x = "", y = "Percent", title = "Share of total manufacturing energy used in food processing in 2015 (IEA raw data)") +
scale_y_continuous(breaks = c(0, 5, 10, 15, 20, 25, 30)) +
theme_classic() +
theme(text = element_text(size = 12),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/ind_energy_frac_foodpro_GCAM_reg_2015_IEA.png"), fig, width = 12, height = 8)
Food processing energy use by fuel in the IEA data.
fig <- ggplot(IEA_ind_sectors_region_fuel %>% filter(FLOW == "FOODPRO"),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~country_name, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_country_fuel_hist_IEA_all.png"), fig, width = 35, height = 20)
# also plot all data at GCAM region level
fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO"),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_all.png"), fig, width = 35, height = 20)
fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO" & year == 2015),
aes(x = region, y = value, fill = fuel)) +
geom_col() +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use in 2015 (IEA raw data)") +
theme_classic() +
theme(text = element_text(size = 12),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_all_2015.png"), fig, width = 12, height = 8)
Also plot non-specified industry energy use by fuel in the IEA data for GCAM regions.
fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "INONSPEC"),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Nonspecified industry energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/inonspec_energy_use_by_GCAM_region_fuel_hist_IEA_all.png"), fig, width = 35, height = 20)
Identify the data to use in calculating these relationships: regions and years from 1990 onwards in which the fraction of total industry energy in non-specified industry energy use is less than 0.5 and the fraction in food processing is greater than 0.01, or even if the non-specified industry fraction is high, the food processing fraction is greater than 0.1. These criteria are used because if the IEA does not know the industrial sector breakdown of use of a particular fuel for a given country, it just categorizes all of that fuel use as non-specified industry fuel use. Thus, regions with high fractions of their industry energy use in non-specified industry likely do not have accurate energy use profiles for the other industrial sectors; this is particularly a problem for food processing, as we expect there to be at least some energy used for food processing in all regions.
IEA_ind_sectors_GCAM_region_total_en_frac_wider <- IEA_ind_sectors_GCAM_region_total_en %>%
dplyr::select(-value) %>%
pivot_wider(names_from = FLOW, values_from = frac) %>%
left_join(GCAM_region_names)
# get regions and years to include
GCAM_reg_years_incl <- IEA_ind_sectors_GCAM_region_total_en_frac_wider %>%
# select only desired years past the first start year in which INONSPEC fraction is not too high and food processing fraction is not too low, or in which food processing fraction is high even if INONSPEC fraction is high
filter(year >= min_year & ((INONSPEC < max_frac_inonspec & (!is.na(FOODPRO) & FOODPRO > min_frac_foodpro)) | FOODPRO > override_frac_foodpro)) %>%
dplyr::select(GCAM_region_ID, year) %>%
left_join(GCAM_region_names)
fig <- ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO") %>% right_join(GCAM_reg_years_incl),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA), selected data") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
fig
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_sel.png"), fig + theme(text = element_text(size = 30)), width = 35, height = 20)
Join the energy data to the food data and filter to just the regions and years that satisfy the criteria for “higher quality” data, that we will use to calculate the relationships between food processing energy use and food consumption.
IEA_foodpro_region_total_en <- IEA_ind_sectors_region_total_en %>%
filter(FLOW == "FOODPRO")
# combine with energy and GDP data on a GCAM-region level
comb_data_sel_GCAM_reg_gcamdata <- IEA_foodpro_region_total_en %>%
group_by(GCAM_region_ID, year) %>%
summarize(EJ = sum(value)) %>%
ungroup() %>%
full_join(total_cal_food_gcamdata_GCAM_reg) %>%
full_join(staples_ns_cal_food_gcamdata_GCAM_reg) %>%
left_join(GDP_pcap_gcamdata_sel) %>%
left_join(GDP_gcamdata_sel) %>%
filter(year %in% all_IEA_years) %>%
left_join(GCAM_region_names)
# filter to the data to include in the relationships calculation
comb_data_sel_GCAM_reg_gcamdata_final <- comb_data_sel_GCAM_reg_gcamdata %>%
right_join(GCAM_reg_years_incl) %>%
filter(!is.na(PKcal),
PKcal > 0)
# filter to the data to infill
comb_data_sel_GCAM_reg_gcamdata_to_infill <- comb_data_sel_GCAM_reg_gcamdata %>%
anti_join(GCAM_reg_years_incl)
Test various models and relationships.
# function to get relevant data from a linear model and print the summary
get_lm_data <- function(lm_model) {
print(summary(lm_model))
coefs_all <- data.frame(coef = unname(lm_model$coefficients),
pval = unname(summary(lm_model)$coef[,4]),
label = names(lm_model$coefficients)) %>%
mutate(region = ifelse(grepl("region", label), substr(label, 7, nchar(label)), NA))
coefs_input_vars <- coefs_all %>%
filter(is.na(region)) %>%
dplyr::select(-region) %>%
mutate(is_sig = pval <= max_pval)
regional_intercepts <- coefs_all %>%
filter(!is.na(region)) %>%
dplyr::select(-label) %>%
rename(intercept = coef) %>%
# include a variable indicating the intercept only if the intercept is significant
mutate(intercept_if_sig = ifelse(pval <= max_pval, intercept, 0))
return(list(coefs_input_vars, regional_intercepts))
}
# function to run a variety of test linear models and use their results to estimate food processing energy use data
# used_data is the data to actually use in calculating the relationships, infill_data is the data for regions/years to infill
run_models_calc_infill <- function(used_data, infill_data) {
# run the function on the models to test to get their data
# models with total calories
m_PKcal_1 <- get_lm_data(lm(EJ ~ 0 + PKcal, used_data))
PKcal_slope_1 <- (m_PKcal_1[[1]] %>% filter(label == "PKcal"))$coef[1]
m_PKcal_2 <- get_lm_data(lm(EJ ~ PKcal, used_data))
PKcal_slope_2 <- (m_PKcal_2[[1]] %>% filter(label == "PKcal"))$coef[1]
overall_intercept_PKcal_2 <- (m_PKcal_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_PKcal_sig_2 <- (m_PKcal_2[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
m_PKcal_3 <- get_lm_data(lm(EJ ~ 0 + PKcal + region, used_data))
PKcal_slope_3 <- (m_PKcal_3[[1]] %>% filter(label == "PKcal"))$coef[1]
region_intercepts_PKcal_3 <- m_PKcal_3[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_PKcal_3", recycle0 = TRUE), contains("intercept"))
m_PKcal_4 <- get_lm_data(lm(EJ ~ PKcal + region, used_data))
PKcal_slope_4 <- (m_PKcal_4[[1]] %>% filter(label == "PKcal"))$coef[1]
overall_intercept_PKcal_4 <- (m_PKcal_4[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_PKcal_sig_4 <- (m_PKcal_4[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_PKcal_4 <- m_PKcal_4[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_PKcal_4", recycle0 = TRUE), contains("intercept"))
m_PKcal_5 <- get_lm_data(lm(EJ ~ PKcal + GDP, used_data))
PKcal_slope_5 <- (m_PKcal_5[[1]] %>% filter(label == "PKcal"))$coef[1]
GDP_slope_5 <- (m_PKcal_5[[1]] %>% filter(label == "GDP"))$coef[1]
overall_intercept_PKcal_5 <- (m_PKcal_5[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_PKcal_sig_5 <- (m_PKcal_5[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
m_PKcal_6 <- get_lm_data(lm(EJ ~ PKcal + GDP + region, used_data))
PKcal_slope_6 <- (m_PKcal_6[[1]] %>% filter(label == "PKcal"))$coef[1]
GDP_slope_6 <- (m_PKcal_6[[1]] %>% filter(label == "GDP"))$coef[1]
overall_intercept_PKcal_6 <- (m_PKcal_6[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_PKcal_sig_6 <- (m_PKcal_6[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_PKcal_6 <- m_PKcal_6[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_PKcal_6", recycle0 = TRUE), contains("intercept"))
# models with nonstaples calories only
m_nonstaples_1 <- get_lm_data(lm(EJ ~ 0 + nonstaples, used_data))
nonstaples_slope_1 <- (m_nonstaples_1[[1]] %>% filter(label == "nonstaples"))$coef[1]
m_nonstaples_2 <- get_lm_data(lm(EJ ~ nonstaples, used_data))
nonstaples_slope_2 <- (m_nonstaples_2[[1]] %>% filter(label == "nonstaples"))$coef[1]
overall_intercept_nonstaples_2 <- (m_nonstaples_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_nonstaples_sig_2 <- (m_nonstaples_2[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
m_nonstaples_3 <- get_lm_data(lm(EJ ~ 0 + nonstaples + region, used_data))
nonstaples_slope_3 <- (m_nonstaples_3[[1]] %>% filter(label == "nonstaples"))$coef[1]
region_intercepts_nonstaples_3 <- m_nonstaples_3[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_nonstaples_3", recycle0 = TRUE), contains("intercept"))
m_nonstaples_4 <- get_lm_data(lm(EJ ~ nonstaples + region, used_data))
nonstaples_slope_4 <- (m_nonstaples_4[[1]] %>% filter(label == "nonstaples"))$coef[1]
overall_intercept_nonstaples_4 <- (m_nonstaples_4[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_nonstaples_sig_4 <- (m_nonstaples_4[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_nonstaples_4 <- m_nonstaples_4[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_nonstaples_4", recycle0 = TRUE), contains("intercept"))
m_nonstaples_5 <- get_lm_data(lm(EJ ~ nonstaples + GDP, used_data))
nonstaples_slope_5 <- (m_nonstaples_5[[1]] %>% filter(label == "nonstaples"))$coef[1]
GDP_slope_5 <- (m_nonstaples_5[[1]] %>% filter(label == "GDP"))$coef[1]
overall_intercept_nonstaples_5 <- (m_nonstaples_5[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_nonstaples_sig_5 <- (m_nonstaples_5[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
m_nonstaples_6 <- get_lm_data(lm(EJ ~ nonstaples + GDP + region, used_data))
nonstaples_slope_6 <- (m_nonstaples_6[[1]] %>% filter(label == "nonstaples"))$coef[1]
GDP_slope_6 <- (m_nonstaples_6[[1]] %>% filter(label == "GDP"))$coef[1]
overall_intercept_nonstaples_6 <- (m_nonstaples_6[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_nonstaples_sig_6 <- (m_nonstaples_6[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_nonstaples_6 <- m_nonstaples_6[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_nonstaples_6", recycle0 = TRUE), contains("intercept"))
# models with both staples and nonstaples calories
m_nonstaples_staples_1 <- get_lm_data(lm(EJ ~ 0 + nonstaples + staples, used_data))
nonstaples_staples_ns_slope_1 <- (m_nonstaples_staples_1[[1]] %>% filter(label == "nonstaples"))$coef[1]
nonstaples_staples_s_slope_1 <- (m_nonstaples_staples_1[[1]] %>% filter(label == "staples"))$coef[1]
m_nonstaples_staples_2 <- get_lm_data(lm(EJ ~ nonstaples + staples, used_data))
nonstaples_staples_ns_slope_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "nonstaples"))$coef[1]
nonstaples_staples_s_slope_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "staples"))$coef[1]
overall_intercept_nonstaples_staples_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
# calculate the food processing energy use based on each of these models, both for regions with good data (to see how the models compare) and for regions without good data
hist_and_infilled_test_methods <- infill_data %>%
mutate(orig_data = FALSE) %>%
rbind(used_data %>% mutate(orig_data = TRUE)) %>%
left_join(region_intercepts_PKcal_3) %>%
left_join(region_intercepts_PKcal_4) %>%
left_join(region_intercepts_PKcal_6) %>%
left_join(region_intercepts_nonstaples_3) %>%
left_join(region_intercepts_nonstaples_4) %>%
left_join(region_intercepts_nonstaples_6) %>%
mutate(across(contains("intercept"), ~ replace_na(.x, 0))) %>%
mutate(energy_est_PKcal_1 = PKcal * PKcal_slope_1,
energy_est_PKcal_2 = PKcal * PKcal_slope_2 + overall_intercept_PKcal_2,
energy_est_PKcal_3 = PKcal * PKcal_slope_3 + intercept_PKcal_3,
energy_est_PKcal_4 = PKcal * PKcal_slope_4 + overall_intercept_PKcal_4 + intercept_PKcal_4,
energy_est_PKcal_5 = PKcal * PKcal_slope_5 + GDP * GDP_slope_5 + overall_intercept_PKcal_5,
energy_est_PKcal_6 = PKcal * PKcal_slope_6 + GDP * GDP_slope_6 + overall_intercept_PKcal_6 + intercept_PKcal_6,
energy_est_nonstaples_1 = nonstaples * nonstaples_slope_1,
energy_est_nonstaples_2 = nonstaples * nonstaples_slope_2 + overall_intercept_nonstaples_2,
energy_est_nonstaples_3 = nonstaples * nonstaples_slope_3 + intercept_nonstaples_3,
energy_est_nonstaples_4 = nonstaples * nonstaples_slope_4 + overall_intercept_nonstaples_4 + intercept_nonstaples_4,
energy_est_nonstaples_5 = nonstaples * nonstaples_slope_5 + GDP * GDP_slope_5 + overall_intercept_nonstaples_5,
energy_est_nonstaples_6 = nonstaples * nonstaples_slope_6 + GDP * GDP_slope_6 + overall_intercept_nonstaples_6 + intercept_nonstaples_6,
energy_est_nonstaples_staples_1 = nonstaples * nonstaples_staples_ns_slope_1 + staples * nonstaples_staples_s_slope_1,
energy_est_nonstaples_staples_2 = nonstaples * nonstaples_staples_ns_slope_2 + staples * nonstaples_staples_s_slope_2 + overall_intercept_nonstaples_staples_2)
return(hist_and_infilled_test_methods)
}
# just going to infill from the minimum year onwards for now
hist_and_infilled_test_methods_gcamdata <- run_models_calc_infill(comb_data_sel_GCAM_reg_gcamdata_final %>%
rename(GDP = GDP_mil90USD), comb_data_sel_GCAM_reg_gcamdata_to_infill %>%
filter(year >= min_year) %>%
rename(GDP = GDP_mil90USD)) %>%
rename(GDP_mil90USD = GDP)
##
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83401 -0.01201 0.01547 0.12236 0.83721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.20918 0.03442 35.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2698 on 435 degrees of freedom
## Multiple R-squared: 0.7394, Adjusted R-squared: 0.7388
## F-statistic: 1234 on 1 and 435 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72022 -0.12001 -0.09853 0.02513 0.78285
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12081 0.01426 8.471 3.81e-16 ***
## PKcal 1.03461 0.03799 27.231 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2502 on 434 degrees of freedom
## Multiple R-squared: 0.6308, Adjusted R-squared: 0.63
## F-statistic: 741.5 on 1 and 434 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63328 -0.01452 0.00111 0.02238 0.29543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.2590434 0.1040050 21.721 < 2e-16
## regionArgentina 0.0104071 0.1020965 0.102 0.9189
## regionAustralia_NZ 0.0849380 0.0201679 4.212 3.11e-05
## regionBrazil 0.2702093 0.0276846 9.760 < 2e-16
## regionCanada -0.0332550 0.0310324 -1.072 0.2845
## regionCentral Asia -0.1646734 0.0727576 -2.263 0.0241
## regionChina -1.8350921 0.1410253 -13.012 < 2e-16
## regionColombia -0.0243059 0.0203270 -1.196 0.2325
## regionEU-12 -0.0504720 0.0235024 -2.148 0.0323
## regionEU-15 0.0161341 0.0507530 0.318 0.7507
## regionEurope_Eastern -0.0459866 0.0301992 -1.523 0.1286
## regionEurope_Non_EU -0.1692711 0.0229329 -7.381 8.65e-13
## regionEuropean Free Trade Association -0.0004314 0.0200577 -0.022 0.9829
## regionJapan -0.0357243 0.0232517 -1.536 0.1252
## regionMexico -0.1515020 0.0230081 -6.585 1.38e-10
## regionRussia 0.0151043 0.0262003 0.576 0.5646
## regionSouth America_Northern -0.0356232 0.0205331 -1.735 0.0835
## regionSouth Korea -0.0399617 0.0206218 -1.938 0.0533
## regionSoutheast Asia -0.3526609 0.0376423 -9.369 < 2e-16
## regionTaiwan -0.0248699 0.0201395 -1.235 0.2176
## regionUSA 0.1857237 0.0423708 4.383 1.48e-05
##
## PKcal ***
## regionArgentina
## regionAustralia_NZ ***
## regionBrazil ***
## regionCanada
## regionCentral Asia *
## regionChina ***
## regionColombia
## regionEU-12 *
## regionEU-15
## regionEurope_Eastern
## regionEurope_Non_EU ***
## regionEuropean Free Trade Association
## regionJapan
## regionMexico ***
## regionRussia
## regionSouth America_Northern .
## regionSouth Korea .
## regionSoutheast Asia ***
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.102 on 415 degrees of freedom
## Multiple R-squared: 0.9645, Adjusted R-squared: 0.9627
## F-statistic: 536.6 on 21 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63328 -0.01452 0.00111 0.02238 0.29543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.010407 0.102097 0.102 0.918858
## PKcal 2.259043 0.104005 21.721 < 2e-16 ***
## regionAustralia_NZ 0.074531 0.103942 0.717 0.473749
## regionBrazil 0.259802 0.104858 2.478 0.013622 *
## regionCanada -0.043662 0.106507 -0.410 0.682058
## regionCentral Asia -0.175080 0.124973 -1.401 0.161977
## regionChina -1.845499 0.169972 -10.858 < 2e-16 ***
## regionColombia -0.034713 0.103922 -0.334 0.738526
## regionEU-12 -0.060879 0.104165 -0.584 0.559236
## regionEU-15 0.005727 0.111914 0.051 0.959212
## regionEurope_Eastern -0.056394 0.106146 -0.531 0.595507
## regionEurope_Non_EU -0.179678 0.104093 -1.726 0.085068 .
## regionEuropean Free Trade Association -0.010839 0.103972 -0.104 0.917026
## regionJapan -0.046131 0.104132 -0.443 0.657991
## regionMexico -0.161909 0.104102 -1.555 0.120639
## regionRussia 0.004697 0.104633 0.045 0.964215
## regionSouth America_Northern -0.046030 0.104024 -0.442 0.658360
## regionSouth Korea -0.050369 0.103912 -0.485 0.628127
## regionSoutheast Asia -0.363068 0.107352 -3.382 0.000788 ***
## regionTaiwan -0.035277 0.103947 -0.339 0.734499
## regionUSA 0.175317 0.108806 1.611 0.107879
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.102 on 415 degrees of freedom
## Multiple R-squared: 0.9414, Adjusted R-squared: 0.9385
## F-statistic: 333.1 on 20 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + GDP, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48117 -0.06333 -0.03162 0.04319 0.68040
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.593e-02 9.609e-03 3.739 0.00021 ***
## PKcal 7.717e-01 2.613e-02 29.532 < 2e-16 ***
## GDP 7.458e-08 2.919e-09 25.550 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1582 on 433 degrees of freedom
## Multiple R-squared: 0.8528, Adjusted R-squared: 0.8521
## F-statistic: 1254 on 2 and 433 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + GDP + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45897 -0.01763 0.00077 0.02210 0.29694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.861e-02 9.091e-02 0.315 0.75316
## PKcal 1.263e+00 1.327e-01 9.513 < 2e-16
## GDP 1.009e-07 9.640e-09 10.472 < 2e-16
## regionAustralia_NZ 9.002e-03 9.274e-02 0.097 0.92272
## regionBrazil 3.093e-01 9.347e-02 3.309 0.00102
## regionCanada -1.314e-01 9.519e-02 -1.381 0.16814
## regionCentral Asia -1.258e-01 1.114e-01 -1.129 0.25936
## regionChina -7.837e-01 1.821e-01 -4.302 2.11e-05
## regionColombia -3.334e-02 9.251e-02 -0.360 0.71873
## regionEU-12 -2.970e-02 9.278e-02 -0.320 0.74906
## regionEU-15 -5.004e-01 1.107e-01 -4.519 8.13e-06
## regionEurope_Eastern -2.308e-02 9.455e-02 -0.244 0.80728
## regionEurope_Non_EU -1.394e-01 9.275e-02 -1.503 0.13371
## regionEuropean Free Trade Association -7.401e-02 9.276e-02 -0.798 0.42541
## regionJapan -3.116e-01 9.610e-02 -3.242 0.00128
## regionMexico -1.317e-01 9.272e-02 -1.421 0.15611
## regionRussia 5.558e-02 9.327e-02 0.596 0.55157
## regionSouth America_Northern -5.779e-02 9.261e-02 -0.624 0.53293
## regionSouth Korea -7.418e-02 9.253e-02 -0.802 0.42318
## regionSoutheast Asia -1.432e-01 9.785e-02 -1.464 0.14397
## regionTaiwan -5.267e-02 9.255e-02 -0.569 0.56960
## regionUSA -3.553e-01 1.093e-01 -3.250 0.00125
##
## (Intercept)
## PKcal ***
## GDP ***
## regionAustralia_NZ
## regionBrazil **
## regionCanada
## regionCentral Asia
## regionChina ***
## regionColombia
## regionEU-12
## regionEU-15 ***
## regionEurope_Eastern
## regionEurope_Non_EU
## regionEuropean Free Trade Association
## regionJapan **
## regionMexico
## regionRussia
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09078 on 414 degrees of freedom
## Multiple R-squared: 0.9536, Adjusted R-squared: 0.9513
## F-statistic: 405.5 on 21 and 414 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72557 -0.01664 0.00731 0.08833 0.57891
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.9998 0.0536 55.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1845 on 435 degrees of freedom
## Multiple R-squared: 0.8781, Adjusted R-squared: 0.8778
## F-statistic: 3132 on 1 and 435 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67429 -0.07662 -0.05151 0.04074 0.54457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.06500 0.01044 6.227 1.12e-09 ***
## nonstaples 2.76976 0.06331 43.751 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.177 on 434 degrees of freedom
## Multiple R-squared: 0.8152, Adjusted R-squared: 0.8148
## F-statistic: 1914 on 1 and 434 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63050 -0.01336 0.00128 0.01962 0.25856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.636533 0.118167 22.312 < 2e-16 ***
## regionArgentina 0.043457 0.100565 0.432 0.665874
## regionAustralia_NZ 0.098035 0.019808 4.949 1.08e-06 ***
## regionBrazil 0.413260 0.023194 17.817 < 2e-16 ***
## regionCanada -0.008711 0.030452 -0.286 0.774972
## regionCentral Asia -0.072501 0.071268 -1.017 0.309601
## regionChina -0.133161 0.062795 -2.121 0.034550 *
## regionColombia 0.005662 0.019833 0.285 0.775426
## regionEU-12 0.053605 0.021038 2.548 0.011195 *
## regionEU-15 0.278114 0.039014 7.129 4.53e-12 ***
## regionEurope_Eastern 0.019494 0.029240 0.667 0.505345
## regionEurope_Non_EU -0.045674 0.020434 -2.235 0.025935 *
## regionEuropean Free Trade Association 0.006866 0.019746 0.348 0.728227
## regionJapan 0.078740 0.020729 3.798 0.000167 ***
## regionMexico -0.037015 0.020588 -1.798 0.072914 .
## regionRussia 0.177063 0.022105 8.010 1.17e-14 ***
## regionSouth America_Northern -0.012984 0.020143 -0.645 0.519554
## regionSouth Korea 0.016224 0.019854 0.817 0.414301
## regionSoutheast Asia 0.049630 0.024254 2.046 0.041357 *
## regionTaiwan -0.011025 0.019784 -0.557 0.577656
## regionUSA 0.385012 0.033779 11.398 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1005 on 415 degrees of freedom
## Multiple R-squared: 0.9655, Adjusted R-squared: 0.9637
## F-statistic: 552.9 on 21 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63050 -0.01336 0.00128 0.01962 0.25856
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.043457 0.100565 0.432 0.66587
## nonstaples 2.636533 0.118167 22.312 < 2e-16 ***
## regionAustralia_NZ 0.054578 0.102430 0.533 0.59444
## regionBrazil 0.369804 0.102792 3.598 0.00036 ***
## regionCanada -0.052168 0.104975 -0.497 0.61948
## regionCentral Asia -0.115958 0.123107 -0.942 0.34678
## regionChina -0.176618 0.116799 -1.512 0.13126
## regionColombia -0.037795 0.102427 -0.369 0.71232
## regionEU-12 0.010148 0.102493 0.099 0.92118
## regionEU-15 0.234658 0.106777 2.198 0.02853 *
## regionEurope_Eastern -0.023963 0.104609 -0.229 0.81892
## regionEurope_Non_EU -0.089130 0.102437 -0.870 0.38475
## regionEuropean Free Trade Association -0.036591 0.102445 -0.357 0.72114
## regionJapan 0.035283 0.102461 0.344 0.73076
## regionMexico -0.080472 0.102449 -0.785 0.43262
## regionRussia 0.133606 0.102687 1.301 0.19395
## regionSouth America_Northern -0.056441 0.102518 -0.551 0.58224
## regionSouth Korea -0.027233 0.102425 -0.266 0.79047
## regionSoutheast Asia 0.006173 0.103037 0.060 0.95225
## regionTaiwan -0.054481 0.102434 -0.532 0.59510
## regionUSA 0.341556 0.105184 3.247 0.00126 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1005 on 415 degrees of freedom
## Multiple R-squared: 0.943, Adjusted R-squared: 0.9403
## F-statistic: 343.4 on 20 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49336 -0.06300 -0.03302 0.04267 0.58707
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.564e-02 8.657e-03 4.117 4.59e-05 ***
## nonstaples 2.184e+00 6.408e-02 34.079 < 2e-16 ***
## GDP 4.620e-08 3.039e-09 15.199 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1431 on 433 degrees of freedom
## Multiple R-squared: 0.8795, Adjusted R-squared: 0.8789
## F-statistic: 1580 on 2 and 433 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46273 -0.01677 0.00076 0.02120 0.26115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.672e-02 9.006e-02 0.519 0.60417
## nonstaples 1.519e+00 1.525e-01 9.958 < 2e-16
## GDP 9.774e-08 9.609e-09 10.171 < 2e-16
## regionAustralia_NZ -2.453e-04 9.189e-02 -0.003 0.99787
## regionBrazil 3.701e-01 9.206e-02 4.021 6.9e-05
## regionCanada -1.335e-01 9.435e-02 -1.415 0.15792
## regionCentral Asia -9.359e-02 1.103e-01 -0.849 0.39653
## regionChina 1.350e-01 1.090e-01 1.239 0.21622
## regionColombia -3.506e-02 9.173e-02 -0.382 0.70247
## regionEU-12 9.742e-03 9.179e-02 0.106 0.91553
## regionEU-15 -3.552e-01 1.118e-01 -3.176 0.00160
## regionEurope_Eastern -5.563e-03 9.370e-02 -0.059 0.95269
## regionEurope_Non_EU -8.889e-02 9.174e-02 -0.969 0.33315
## regionEuropean Free Trade Association -8.660e-02 9.188e-02 -0.943 0.34646
## regionJapan -2.567e-01 9.614e-02 -2.670 0.00788
## regionMexico -8.621e-02 9.175e-02 -0.940 0.34798
## regionRussia 1.275e-01 9.196e-02 1.386 0.16636
## regionSouth America_Northern -6.324e-02 9.181e-02 -0.689 0.49136
## regionSouth Korea -6.009e-02 9.178e-02 -0.655 0.51299
## regionSoutheast Asia 6.075e-02 9.243e-02 0.657 0.51135
## regionTaiwan -6.300e-02 9.174e-02 -0.687 0.49265
## regionUSA -2.449e-01 1.104e-01 -2.217 0.02713
##
## (Intercept)
## nonstaples ***
## GDP ***
## regionAustralia_NZ
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina
## regionColombia
## regionEU-12
## regionEU-15 **
## regionEurope_Eastern
## regionEurope_Non_EU
## regionEuropean Free Trade Association
## regionJapan **
## regionMexico
## regionRussia
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09001 on 414 degrees of freedom
## Multiple R-squared: 0.9544, Adjusted R-squared: 0.9521
## F-statistic: 412.8 on 21 and 414 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples + staples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70081 -0.02672 0.00531 0.07790 0.55103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 4.28248 0.10547 40.60 <2e-16 ***
## staples -1.06518 0.07918 -13.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1552 on 434 degrees of freedom
## Multiple R-squared: 0.9139, Adjusted R-squared: 0.9135
## F-statistic: 2305 on 2 and 434 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + staples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60898 -0.07214 -0.03738 0.04320 0.51852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.051990 0.008874 5.858 9.26e-09 ***
## nonstaples 4.038462 0.109841 36.766 < 2e-16 ***
## staples -1.015324 0.076776 -13.224 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1496 on 433 degrees of freedom
## Multiple R-squared: 0.8683, Adjusted R-squared: 0.8677
## F-statistic: 1428 on 2 and 433 DF, p-value: < 2.2e-16
Compare to using per capita GDP and log of GDP in the models, just showing that total GDP is a better predictor.
summary(lm(EJ ~ PKcal + GDP_pcap_thous90USD, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ PKcal + GDP_pcap_thous90USD, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69990 -0.10570 -0.06653 0.04900 0.68536
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0312329 0.0192851 1.62 0.106
## PKcal 1.0796270 0.0369212 29.24 < 2e-16 ***
## GDP_pcap_thous90USD 0.0056956 0.0008683 6.56 1.54e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2389 on 433 degrees of freedom
## Multiple R-squared: 0.6642, Adjusted R-squared: 0.6626
## F-statistic: 428.2 on 2 and 433 DF, p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + GDP_pcap_thous90USD + region, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ PKcal + GDP_pcap_thous90USD + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59439 -0.03092 0.00313 0.03839 0.29984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.051684 0.100844 -0.513 0.60856
## PKcal 2.146238 0.104861 20.467 < 2e-16 ***
## GDP_pcap_thous90USD 0.009621 0.002167 4.440 1.16e-05 ***
## regionAustralia_NZ -0.144253 0.112986 -1.277 0.20241
## regionBrazil 0.281827 0.102691 2.744 0.00633 **
## regionCanada -0.283871 0.117394 -2.418 0.01603 *
## regionCentral Asia -0.130503 0.122659 -1.064 0.28797
## regionChina -1.650144 0.171989 -9.594 < 2e-16 ***
## regionColombia -0.003863 0.101893 -0.038 0.96978
## regionEU-12 -0.048614 0.101931 -0.477 0.63366
## regionEU-15 -0.113197 0.112702 -1.004 0.31578
## regionEurope_Eastern -0.007978 0.104402 -0.076 0.93912
## regionEurope_Non_EU -0.157040 0.101950 -1.540 0.12424
## regionEuropean Free Trade Association -0.407524 0.135375 -3.010 0.00277 **
## regionJapan -0.241000 0.110915 -2.173 0.03036 *
## regionMexico -0.143076 0.101920 -1.404 0.16112
## regionRussia 0.029302 0.102501 0.286 0.77512
## regionSouth America_Northern -0.040234 0.101764 -0.395 0.69278
## regionSouth Korea -0.089201 0.102021 -0.874 0.38244
## regionSoutheast Asia -0.282947 0.106550 -2.656 0.00822 **
## regionTaiwan -0.063842 0.101883 -0.627 0.53125
## regionUSA -0.004658 0.113890 -0.041 0.96740
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09975 on 414 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.9412
## F-statistic: 332.4 on 21 and 414 DF, p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_mil90USD), comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_mil90USD), data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52183 -0.10943 -0.01533 0.10631 0.55149
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.986116 0.104855 -18.94 <2e-16 ***
## PKcal 0.728334 0.031231 23.32 <2e-16 ***
## log(GDP_mil90USD) 0.159725 0.007911 20.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1798 on 433 degrees of freedom
## Multiple R-squared: 0.8098, Adjusted R-squared: 0.809
## F-statistic: 922 on 2 and 433 DF, p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_pcap_thous90USD), comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_pcap_thous90USD), data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66717 -0.12554 -0.05791 0.08729 0.65957
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.15176 0.02894 -5.244 2.46e-07 ***
## PKcal 1.16130 0.03605 32.217 < 2e-16 ***
## log(GDP_pcap_thous90USD) 0.11435 0.01090 10.492 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2236 on 433 degrees of freedom
## Multiple R-squared: 0.7056, Adjusted R-squared: 0.7043
## F-statistic: 519 on 2 and 433 DF, p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_mil90USD) + region, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_mil90USD) + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62861 -0.02104 0.00191 0.02300 0.29182
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.42350 0.31788 -1.332 0.18351
## PKcal 2.10088 0.15111 13.903 < 2e-16 ***
## log(GDP_mil90USD) 0.03499 0.02428 1.441 0.15031
## regionAustralia_NZ 0.04163 0.10629 0.392 0.69549
## regionBrazil 0.23551 0.10607 2.220 0.02694 *
## regionCanada -0.08974 0.11107 -0.808 0.41958
## regionCentral Asia -0.16200 0.12514 -1.295 0.19619
## regionChina -1.70756 0.19488 -8.762 < 2e-16 ***
## regionColombia -0.01151 0.10503 -0.110 0.91276
## regionEU-12 -0.07727 0.10465 -0.738 0.46072
## regionEU-15 -0.05048 0.11838 -0.426 0.67004
## regionEurope_Eastern -0.02414 0.10835 -0.223 0.82383
## regionEurope_Non_EU -0.18525 0.10403 -1.781 0.07568 .
## regionEuropean Free Trade Association -0.03931 0.10570 -0.372 0.71013
## regionJapan -0.12215 0.11661 -1.047 0.29548
## regionMexico -0.17540 0.10439 -1.680 0.09365 .
## regionRussia -0.01280 0.10520 -0.122 0.90319
## regionSouth America_Northern -0.02778 0.10466 -0.265 0.79077
## regionSouth Korea -0.06800 0.10450 -0.651 0.51558
## regionSoutheast Asia -0.34438 0.10799 -3.189 0.00154 **
## regionTaiwan -0.02588 0.10402 -0.249 0.80367
## regionUSA 0.10781 0.11833 0.911 0.36280
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1018 on 414 degrees of freedom
## Multiple R-squared: 0.9416, Adjusted R-squared: 0.9387
## F-statistic: 318.1 on 21 and 414 DF, p-value: < 2.2e-16
summary(lm(EJ ~ PKcal + log(GDP_pcap_thous90USD) + region, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ PKcal + log(GDP_pcap_thous90USD) + region,
## data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63251 -0.01546 0.00103 0.02196 0.29382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.011989 0.115215 -0.104 0.9172
## PKcal 2.209666 0.156830 14.090 < 2e-16 ***
## log(GDP_pcap_thous90USD) 0.012725 0.030227 0.421 0.6740
## regionAustralia_NZ 0.055239 0.113690 0.486 0.6273
## regionBrazil 0.267943 0.106729 2.511 0.0124 *
## regionCanada -0.063324 0.116394 -0.544 0.5867
## regionCentral Asia -0.161690 0.129078 -1.253 0.2110
## regionChina -1.762410 0.260582 -6.763 4.61e-11 ***
## regionColombia -0.026919 0.105660 -0.255 0.7990
## regionEU-12 -0.056146 0.104873 -0.535 0.5927
## regionEU-15 0.009854 0.112453 0.088 0.9302
## regionEurope_Eastern -0.040679 0.112618 -0.361 0.7181
## regionEurope_Non_EU -0.172988 0.105401 -1.641 0.1015
## regionEuropean Free Trade Association -0.036877 0.121067 -0.305 0.7608
## regionJapan -0.060505 0.109685 -0.552 0.5815
## regionMexico -0.156326 0.105046 -1.488 0.1375
## regionRussia 0.012970 0.106565 0.122 0.9032
## regionSouth America_Northern -0.045496 0.104135 -0.437 0.6624
## regionSouth Korea -0.055533 0.104736 -0.530 0.5962
## regionSoutheast Asia -0.331653 0.130828 -2.535 0.0116 *
## regionTaiwan -0.040065 0.104670 -0.383 0.7021
## regionUSA 0.172524 0.109115 1.581 0.1146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1021 on 414 degrees of freedom
## Multiple R-squared: 0.9414, Adjusted R-squared: 0.9384
## F-statistic: 316.6 on 21 and 414 DF, p-value: < 2.2e-16
Indeed, comparing these models with the previous results, total GDP seems to be a better predictor than log(GDP) or per capita GDP or log(per capita GDP).
Plot the results.
First looking at relationships with total calories, just historical data first. Methods, as labeled by number, are: 1) Linear model with calories (all or nonstaples) and no intercept 2) Linear model with calories (all or nonstaples) with intercept 3) Linear model with calories (all or nonstaples) and no intercept but with regional fixed effects 4) Linear model with calories (all or nonstaples) with intercept and regional fixed effects 5) Linear model with calories (all or nonstaples) and GDP with intercept 6) Linear model with calories (all or nonstaples) and GDP with intercept and regional fixed effects
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~EJ,
type = "scatter",
mode = "markers",
color = ~region,
name = ~paste(region, "orig"),
text = "Original data",
marker = list(line = list(color = "black", width = 1))) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~energy_est_PKcal_1,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
text = "Est 1",
marker = list(symbol = "cross")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~energy_est_PKcal_2,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 2",
marker = list(symbol = "square")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~energy_est_PKcal_3,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 3",
marker = list(symbol = "diamond")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~energy_est_PKcal_4,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 4",
marker = list(symbol = "star")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~energy_est_PKcal_5,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 5",
marker = list(symbol = "triangle-down")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~energy_est_PKcal_6,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 6",
marker = list(symbol = "triangle-up")) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Energy used in food processing vs calorie consumption")
Generally the fixed effects models are a lot better. GDP does seem to help the predictions, as well. Raw intercept (not regionally-varying) does not.
Now historical data with calories from non-staples as the predictor.
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~EJ,
type = "scatter",
mode = "markers",
color = ~region,
name = ~paste(region, "orig"),
text = "Original data",
marker = list(line = list(color = "black", width = 1))) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~energy_est_nonstaples_1,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
text = "Est 1",
marker = list(symbol = "cross")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~energy_est_nonstaples_2,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 2",
marker = list(symbol = "square")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~energy_est_nonstaples_3,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 3",
marker = list(symbol = "diamond")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~energy_est_nonstaples_4,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 4",
marker = list(symbol = "star")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~energy_est_nonstaples_5,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 5",
marker = list(symbol = "triangle-down")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~energy_est_nonstaples_6,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 6",
marker = list(symbol = "triangle-up")) %>%
layout(xaxis = list(title = "PKcal from non-staples"),
yaxis = list(title = "EJ"),
title = "Energy used in food processing vs non-staples consumption")
Now plot just for the regions to infill, compared to the historical data for the other regions.
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~EJ,
type = "scatter",
mode = "markers",
color = ~region,
name = ~paste(region, "orig"),
text = "Original data",
marker = list(line = list(color = "black", width = 1))) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_PKcal_1,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
text = "Est 1",
marker = list(symbol = "cross")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_PKcal_2,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 2",
marker = list(symbol = "square")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_PKcal_3,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 3",
marker = list(symbol = "diamond")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_PKcal_4,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 4",
marker = list(symbol = "star")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_PKcal_5,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 5",
marker = list(symbol = "triangle-down")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_PKcal_6,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 6",
marker = list(symbol = "triangle-up")) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Energy used in food processing vs calorie consumption")
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~nonstaples,
y = ~EJ,
type = "scatter",
mode = "markers",
color = ~region,
name = ~paste(region, "orig"),
text = "Original data",
marker = list(line = list(color = "black", width = 1))) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~nonstaples,
y = ~energy_est_nonstaples_1,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
text = "Est 1",
marker = list(symbol = "cross")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~nonstaples,
y = ~energy_est_nonstaples_2,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 2",
marker = list(symbol = "square")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~nonstaples,
y = ~energy_est_nonstaples_3,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 3",
marker = list(symbol = "diamond")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~nonstaples,
y = ~energy_est_nonstaples_4,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 4",
marker = list(symbol = "star")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~nonstaples,
y = ~energy_est_nonstaples_5,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 5",
marker = list(symbol = "triangle-down")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~nonstaples,
y = ~energy_est_nonstaples_6,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 6",
marker = list(symbol = "triangle-up")) %>%
layout(xaxis = list(title = "PKcal from non-staples"),
yaxis = list(title = "EJ"),
title = "Energy used in food processing vs non-staples consumption")
# plot the nonstaples methods again but with x axis total calories
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~EJ,
type = "scatter",
mode = "markers",
color = ~region,
name = ~paste(region, "orig"),
text = "Original data",
marker = list(line = list(color = "black", width = 1))) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_nonstaples_1,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
text = "Est 1",
marker = list(symbol = "cross")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_nonstaples_2,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 2",
marker = list(symbol = "square")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_nonstaples_3,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 3",
marker = list(symbol = "diamond")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_nonstaples_4,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 4",
marker = list(symbol = "star")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_nonstaples_5,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 5",
marker = list(symbol = "triangle-down")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == FALSE),
x = ~PKcal,
y = ~energy_est_nonstaples_6,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
showlegend = FALSE,
text = "Est 6",
marker = list(symbol = "triangle-up")) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Energy used in food processing vs calorie consumption (infill calculated with nonstaples)")
From these, for PKcal, infilling without regional fixed effects seems like it makes more sense, or with GDP (with or without fixed effects).
Now plot the predicted values relative to the observed historical values.
hist_and_infilled_test_methods_gcamdata_longer <- hist_and_infilled_test_methods_gcamdata %>%
dplyr::select(GCAM_region_ID, year, EJ, PKcal, nonstaples, staples, scenario, GDP_pcap_thous90USD, GDP_mil90USD, region, orig_data, energy_est_PKcal_1, energy_est_PKcal_2, energy_est_PKcal_3, energy_est_PKcal_4, energy_est_PKcal_5, energy_est_PKcal_6, energy_est_nonstaples_1, energy_est_nonstaples_2, energy_est_nonstaples_3, energy_est_nonstaples_4, energy_est_nonstaples_5, energy_est_nonstaples_6) %>%
pivot_longer(cols = c(energy_est_PKcal_1, energy_est_PKcal_2, energy_est_PKcal_3, energy_est_PKcal_4, energy_est_PKcal_5, energy_est_PKcal_6, energy_est_nonstaples_1, energy_est_nonstaples_2, energy_est_nonstaples_3, energy_est_nonstaples_4, energy_est_nonstaples_5, energy_est_nonstaples_6),
names_to = "label",
values_to = "value") %>%
mutate(EJ_per_PKcal_est = value / PKcal,
EJ_per_PKcal = EJ / PKcal)
pal_pred_plot <- c("darkred", "darkorange", "goldenrod", "gold", "darkgreen", "green4", "blue4", "lightskyblue", "purple4", "purple", "plum", "pink", "black", "darkgray")
# loop through and plot
for (region_name in unique(hist_and_infilled_test_methods_gcamdata$region)) {
# select data for this region
data_sel <- hist_and_infilled_test_methods_gcamdata_longer %>% filter(region == region_name)
# get min and max values for the plot
x_min <- min(data_sel$PKcal) - 0.5 * min(data_sel$PKcal)
x_max <- max(data_sel$PKcal) + 0.5 * max(data_sel$PKcal)
y_min <- min(data_sel$value) - 0.5 * min(data_sel$value)
y_max <- max(data_sel$value) + 0.5 * max(data_sel$value)
ggplot() +
geom_point(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE & region != region_name) %>% mutate(label = "original data, other countries"),
aes(x = PKcal, y = EJ, color = label)) +
geom_point(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE & region == region_name) %>% mutate(label = "original data"),
aes(x = PKcal, y = EJ, color = label)) +
geom_point(data = data_sel,
aes(x = PKcal, y = value, color = label)) +
scale_color_manual(values = pal_pred_plot) +
xlim(x_min, x_max) +
ylim(y_min, y_max) +
labs(x = "PKcal", y = "EJ", title = paste0(region_name, " predicted food processing energy use vs calorie consumption"))
ggsave(paste0(fig_dir, "/food_pro_EJ_pred_comp_methods_", region_name, ".png"), width = 15, height = 10)
}
# plot infilled data
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata %>% filter(orig_data == TRUE),
x = ~PKcal,
y = ~EJ,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(line = list(color = I("black"), width = 1),
synbol = "circle")) %>%
add_trace(data = hist_and_infilled_test_methods_gcamdata_longer,
x = ~PKcal,
y = ~value,
color = ~region,
symbol = ~label,
type = "scatter",
mode = "markers") %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption, historical and estimated data with various methods,\nhistorical outlined in black")
Compare the estimated energy per calorie coefficients, with the various methods, for the regions with data to infill relative to the regions with historical data that we used. Just looking at the average coefficients over the whole time series and the 2015 values.
infilled_test_methods_gcamdata_avg_coef <- hist_and_infilled_test_methods_gcamdata_longer %>%
group_by(GCAM_region_ID, region, label) %>%
mutate(EJ_per_PKcal_est_mean = mean(EJ_per_PKcal_est, na.rm = TRUE)) %>%
ungroup()
infilled_test_methods_gcamdata_avg_coef_sel <- infilled_test_methods_gcamdata_avg_coef %>%
dplyr::select(region, EJ_per_PKcal_mean = EJ_per_PKcal_est_mean, label) %>%
distinct()
hist_gcamdata_avg_coef <- hist_and_infilled_test_methods_gcamdata %>%
filter(orig_data == TRUE) %>%
mutate(EJ_per_PKcal = EJ / PKcal) %>%
group_by(GCAM_region_ID, region) %>%
mutate(EJ_per_PKcal_mean = mean(EJ_per_PKcal)) %>%
ungroup()
hist_gcamdata_avg_coef_sel <- hist_gcamdata_avg_coef %>%
dplyr::select(region, EJ_per_PKcal_mean) %>%
distinct() %>%
mutate(label = "historical")
hist_infilled_test_methods_gcamdata_avg_coef_sel <- rbind(infilled_test_methods_gcamdata_avg_coef_sel,
hist_gcamdata_avg_coef_sel)
# plot average
plot_ly(width = 1000) %>%
add_trace(data = hist_gcamdata_avg_coef_sel,
x = ~label,
y = ~EJ_per_PKcal_mean,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(line = list(color = I("black"), width = 1),
synbol = "circle")) %>%
add_trace(data = infilled_test_methods_gcamdata_avg_coef_sel,
x = ~label,
y = ~EJ_per_PKcal_mean,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " estimated")) %>%
layout(xaxis = list(title = "Method"),
yaxis = list(title = "EJ per PKcal"),
title = "Average food processing energy use coefficient, historical and estimated data with various methods,\nhistorical outlined in black")
plot_ly(width = 1000) %>%
add_trace(data = hist_gcamdata_avg_coef_sel,
x = ~region,
y = ~EJ_per_PKcal_mean,
color = ~label,
colors = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"),
type = "scatter",
mode = "markers",
marker = list(size = 7)) %>%
add_trace(data = infilled_test_methods_gcamdata_avg_coef_sel,
x = ~region,
y = ~EJ_per_PKcal_mean,
color = ~label,
type = "scatter",
mode = "markers",
marker = list(size = 5)) %>%
layout(xaxis = list(title = ""),
yaxis = list(title = "EJ per PKcal"),
title = "Average food processing energy use coefficient, historical and estimated data with various methods")
fig <- ggplot(hist_infilled_test_methods_gcamdata_avg_coef_sel,
aes(x = label, y = EJ_per_PKcal_mean)) +
geom_point(size = 4) +
facet_wrap(~region, ncol = 6) +
labs(x = "Method", y = "EJ per PKcal", title = "Average food processing energy use coefficient, historical and estimated data with various methods") +
theme_bw() +
theme(text = element_text(size = 24),
strip.background = element_blank(),
axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/coefs_avg_infilled_test_methods_1.png"), fig, width = 35, height = 20)
fig <- ggplot(hist_infilled_test_methods_gcamdata_avg_coef_sel,
aes(x = region, y = EJ_per_PKcal_mean, color = label)) +
geom_point(size = 4) +
scale_color_manual(values = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"), name = "Method") +
labs(x = "Region", y = "EJ per PKcal", title = "Average food processing energy use coefficient, historical and estimated data with various methods") +
theme_bw() +
theme(text = element_text(size = 24),
strip.background = element_blank(),
axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/coefs_avg_infilled_test_methods_2.png"), fig, width = 35, height = 20)
# plot 2015 values
hist_infilled_test_methods_gcamdata_coef_sel <- hist_gcamdata_avg_coef %>%
mutate(label = "historical") %>%
dplyr::select(GCAM_region_ID, region, year, label, EJ_per_PKcal) %>%
rbind(infilled_test_methods_gcamdata_avg_coef %>%
dplyr::select(GCAM_region_ID, region, year, label, EJ_per_PKcal = EJ_per_PKcal_est))
hist_infilled_test_methods_gcamdata_2015_coef_sel <- hist_infilled_test_methods_gcamdata_coef_sel %>%
filter(year == 2015)
plot_ly(width = 1000) %>%
add_trace(data = hist_gcamdata_avg_coef %>%
filter(year == 2015) %>%
mutate(label = "historical"),
x = ~label,
y = ~EJ_per_PKcal,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(line = list(color = I("black"), width = 1),
symbol = "circle")) %>%
add_trace(data = infilled_test_methods_gcamdata_avg_coef %>%
filter(year == 2015),
x = ~label,
y = ~EJ_per_PKcal_est,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " estimated")) %>%
layout(xaxis = list(title = "Method"),
yaxis = list(title = "EJ per PKcal"),
title = "2015 food processing energy use coefficient, historical and estimated data with various methods,\nhistorical outlined in black")
plot_ly(width = 1000) %>%
add_trace(data = hist_gcamdata_avg_coef %>%
filter(year == 2015) %>%
mutate(label = "historical"),
x = ~region,
y = ~EJ_per_PKcal,
color = ~label,
colors = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"),
type = "scatter",
mode = "markers",
marker = list(size = 7)) %>%
add_trace(data = infilled_test_methods_gcamdata_avg_coef %>%
filter(year == 2015),
x = ~region,
y = ~EJ_per_PKcal_est,
color = ~label,
type = "scatter",
mode = "markers",
marker = list(size = 5)) %>%
layout(xaxis = list(title = ""),
yaxis = list(title = "EJ per PKcal"),
title = "2015 food processing energy use coefficient, historical and estimated data with various methods")
fig <- ggplot(hist_infilled_test_methods_gcamdata_2015_coef_sel,
aes(x = label, y = EJ_per_PKcal)) +
geom_point(size = 4) +
facet_wrap(~region, ncol = 6) +
labs(x = "Method", y = "EJ per PKcal", title = "2015 food processing energy use coefficient, historical and estimated data with various methods") +
theme_bw() +
theme(text = element_text(size = 24),
strip.background = element_blank(),
axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/coefs_2015_infilled_test_methods_1.png"), fig, width = 35, height = 20)
fig <- ggplot(hist_infilled_test_methods_gcamdata_2015_coef_sel,
aes(x = region, y = EJ_per_PKcal, color = label)) +
geom_point(size = 4) +
scale_color_manual(values = c("saddlebrown", "sandybrown", "red", "darkorange", "gold3", "yellowgreen", "darkgreen", "royalblue", "slateblue4", "mediumpurple", "orchid", "plum", "black"), name = "Method") +
labs(x = "Region", y = "EJ per PKcal", title = "2015 food processing energy use coefficient, historical and estimated data with various methods") +
theme_bw() +
theme(text = element_text(size = 24),
strip.background = element_blank(),
axis.text.x = element_text(angle=90, vjust = 1, hjust = 1))
ggsave(paste0(fig_dir, "/coefs_2015_infilled_test_methods_2.png"), fig, width = 35, height = 20)
# plot 2010-2015 values, just for selected methods
fig <- ggplot(hist_infilled_test_methods_gcamdata_coef_sel %>% filter(year >= 2010 &
year <= 2015 &
label %in% c("historical", "energy_est_nonstaples_1",
"energy_est_nonstaples_4",
"energy_est_nonstaples_5",
"energy_est_nonstaples_6")),
aes(x = region, y = EJ_per_PKcal, color = label)) +
geom_point(size = 4) +
scale_color_manual(values = c("red3", "darkgreen", "royalblue", "mediumpurple", "black"), name = "Method") +
labs(x = "Region", y = "EJ per PKcal", title = "2015 food processing energy use coefficient, historical and estimated data with various methods") +
facet_wrap(~year, nrow = 2) +
theme_bw() +
theme(text = element_text(size = 24),
strip.background = element_blank(),
axis.text.x = element_text(angle=90, vjust = 0, hjust = 1))
ggsave(paste0(fig_dir, "/coefs_2010_2015_infilled_test_methods_sel.png"), fig, width = 35, height = 20)
From these plots, and the summaries of the various models, it seems that the model with nonstaples, GDP, and regional fixed effects fits the data best and generates reasonable predictions for infilling data for regions with missing data. Looking at the 2015 values specifically, the coefficients of predicted EJ per total PKcal consumed seem reasonable, and if anything likely to be underestimates for most of the regions that need infilled data, which is the more conservative path to take anyway (i.e., if anything, pulling out slightly too little energy from other industrial energy use - that seems like a better “worst case” than pulling out too much and overestimating food processing energy use).
First output (again) the information for this relationship.
# print final relationship and get values
m_final <- lm(EJ ~ nonstaples + GDP_mil90USD + region, comb_data_sel_GCAM_reg_gcamdata_final)
# print(summary(m_final))
m_final_data <- get_lm_data(m_final)
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP_mil90USD + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46273 -0.01677 0.00076 0.02120 0.26115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.672e-02 9.006e-02 0.519 0.60417
## nonstaples 1.519e+00 1.525e-01 9.958 < 2e-16
## GDP_mil90USD 9.774e-08 9.609e-09 10.171 < 2e-16
## regionAustralia_NZ -2.453e-04 9.189e-02 -0.003 0.99787
## regionBrazil 3.701e-01 9.206e-02 4.021 6.9e-05
## regionCanada -1.335e-01 9.435e-02 -1.415 0.15792
## regionCentral Asia -9.359e-02 1.103e-01 -0.849 0.39653
## regionChina 1.350e-01 1.090e-01 1.239 0.21622
## regionColombia -3.506e-02 9.173e-02 -0.382 0.70247
## regionEU-12 9.742e-03 9.179e-02 0.106 0.91553
## regionEU-15 -3.552e-01 1.118e-01 -3.176 0.00160
## regionEurope_Eastern -5.563e-03 9.370e-02 -0.059 0.95269
## regionEurope_Non_EU -8.889e-02 9.174e-02 -0.969 0.33315
## regionEuropean Free Trade Association -8.660e-02 9.188e-02 -0.943 0.34646
## regionJapan -2.567e-01 9.614e-02 -2.670 0.00788
## regionMexico -8.621e-02 9.175e-02 -0.940 0.34798
## regionRussia 1.275e-01 9.196e-02 1.386 0.16636
## regionSouth America_Northern -6.324e-02 9.181e-02 -0.689 0.49136
## regionSouth Korea -6.009e-02 9.178e-02 -0.655 0.51299
## regionSoutheast Asia 6.075e-02 9.243e-02 0.657 0.51135
## regionTaiwan -6.300e-02 9.174e-02 -0.687 0.49265
## regionUSA -2.449e-01 1.104e-01 -2.217 0.02713
##
## (Intercept)
## nonstaples ***
## GDP_mil90USD ***
## regionAustralia_NZ
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina
## regionColombia
## regionEU-12
## regionEU-15 **
## regionEurope_Eastern
## regionEurope_Non_EU
## regionEuropean Free Trade Association
## regionJapan **
## regionMexico
## regionRussia
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09001 on 414 degrees of freedom
## Multiple R-squared: 0.9544, Adjusted R-squared: 0.9521
## F-statistic: 412.8 on 21 and 414 DF, p-value: < 2.2e-16
nonstaples_slope_final <- (m_final_data[[1]] %>% filter(label == "nonstaples"))$coef[1]
GDP_slope_final <- (m_final_data[[1]] %>% filter(label == "GDP_mil90USD"))$coef[1]
overall_intercept_final <- (m_final_data[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_sig_final <- (m_final_data[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_final <- m_final_data[[2]] %>% dplyr::select(region, intercept, intercept_if_sig)
# apply model
hist_and_infilled_final_gcamdata <- comb_data_sel_GCAM_reg_gcamdata_to_infill %>%
mutate(orig_data = FALSE) %>%
rbind(comb_data_sel_GCAM_reg_gcamdata_final %>% mutate(orig_data = TRUE)) %>%
left_join(region_intercepts_final) %>%
mutate(across(contains("intercept"), ~ replace_na(.x, 0))) %>%
mutate(EJ_est = nonstaples * nonstaples_slope_final + GDP_mil90USD * GDP_slope_final + overall_intercept_final + intercept,
EJ_per_PKcal = EJ / PKcal,
EJ_per_PKcal_est = EJ_est / PKcal)
Using this relationship, plot the infilled data.
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
x = ~PKcal,
y = ~EJ,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region,
text = ~year) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
x = ~PKcal,
y = ~EJ_est,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " estimated"),
marker = list(symbol = "cross"),
legendgroup = ~region,
text = ~year) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption, 1990-2015\nhistorical and infilled data")
# plot historical data for regions even with not good data
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
x = ~PKcal,
y = ~EJ,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region,
text = ~year) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
x = ~PKcal,
y = ~EJ_est,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " estimated"),
marker = list(symbol = "cross"),
legendgroup = ~region,
text = ~year) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
x = ~PKcal,
y = ~EJ,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " original, excluded"),
marker = list(symbol = "square",
line = list(color = I("gray"), width = 1)),
legendgroup = ~region,
text = ~year) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption, 1990-2015\nhistorical and infilled data, showing excluded original data")
fig <- ggplot() +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
aes(x = PKcal, y = EJ, color = region, shape = I(19))) +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
aes(x = PKcal, y = EJ_est, color = region, shape = I(3))) +
scale_color_manual(values = palette36, name = "Region") +
labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption, 1990-2015\nhistorical (circles) and infilled (crosses) data") +
theme_bw() +
theme(text = element_text(size = 12),
strip.background = element_blank())
ggsave(paste0(fig_dir, "/food_pro_EJ_PKcal_hist_infill_final.png"), fig, width = 15, height = 8)
fig <- ggplot() +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
aes(x = PKcal, y = EJ, color = I("black"), size = I(3))) +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
aes(x = PKcal, y = EJ_est, color = I("royalblue"), size = I(3))) +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
aes(x = PKcal, y = EJ, color = I("mediumpurple1"), size = I(3))) +
facet_wrap(~region, nrow = 6, scales = "free") +
labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption, 1990-2015\nhistorical data (black for regions where data are used, purple for excluded) and infilled data (blue)") +
theme_bw() +
theme(text = element_text(size = 24),
strip.background = element_blank())
ggsave(paste0(fig_dir, "/food_pro_EJ_PKcal_hist_infill_final_sep_regions.png"), fig, width = 35, height = 30)
# coefficients time series
plot_ly(width = 1000) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 1990),
x = ~year,
y = ~EJ_per_PKcal,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region,
text = ~year) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
x = ~year,
y = ~EJ_per_PKcal_est,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " estimated"),
marker = list(symbol = "cross"),
legendgroup = ~region,
text = ~year) %>%
add_trace(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 1990),
x = ~year,
y = ~EJ_per_PKcal,
color = ~region,
type = "scatter",
mode = "markers",
name = ~paste0(region, " original, excluded"),
marker = list(symbol = "square",
line = list(color = I("gray"), width = 1)),
legendgroup = ~region,
text = ~year) %>%
layout(xaxis = list(title = "Year"),
yaxis = list(title = "EJ per PKcal"),
title = "Coefficient of food processing energy use per calorie, 1990-2015\nhistorical and infilled data, showing excluded original data")
fig <- ggplot() +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE & year >= 2010),
aes(x = region, y = EJ_per_PKcal, color = I("black"))) +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 2010),
aes(x = region, y = EJ_per_PKcal, color = I("mediumpurple1"))) +
geom_point(data = hist_and_infilled_final_gcamdata %>% filter(orig_data == FALSE & year >= 2010),
aes(x = region, y = EJ_per_PKcal_est, color = I("royalblue"))) +
facet_wrap(~year, nrow = 2) +
labs(x = "", y = "EJ per PKcal", title = "Coefficient of food processing energy use per calorie consumed, 1990-2015\nhistorical data (black for regions where data are used, purple for excluded) and infilled data (blue)") +
theme_bw() +
theme(text = element_text(size = 18),
strip.background = element_blank(),
axis.text.x = element_text(angle=90, vjust = 0, hjust = 1))
ggsave(paste0(fig_dir, "/coef_2010_2015_food_pro_EJ_PKcal_hist_infill_final.png"), fig, width = 20, height = 12)
# calculate the new shares of food processing energy use of total industry energy use with these infilled values
new_foodpro_frac <- IEA_ind_sectors_GCAM_region_total_en %>%
group_by(year, GCAM_region_ID) %>%
summarize(total = sum(value)) %>%
right_join(hist_and_infilled_final_gcamdata %>%
filter(orig_data == FALSE) %>%
dplyr::select(year, GCAM_region_ID, EJ_est)) %>%
mutate(frac = EJ_est / total)
When infilling in gcamdata, we will only infill for regions that fit the criteria to exclude as defined previously and also have energy per calorie coefficients below the minimum “reasonable” value from the data deemed higher quality and used to fit these relationships. That value is printed below.
print(min((hist_and_infilled_final_gcamdata %>% filter(orig_data == TRUE))$EJ_per_PKcal))
## [1] 0.4130662
Print all of the values that will be used in gcamdata.
print(paste0("Coefficient of nonstaples consumption (EJ per PKcal): ", round(nonstaples_slope_final, 4)))
## [1] "Coefficient of nonstaples consumption (EJ per PKcal): 1.519"
print(paste0("Coefficient of GDP (EJ per million 1990 USD): ", round(GDP_slope_final, 12)))
## [1] "Coefficient of GDP (EJ per million 1990 USD): 9.7741e-08"
print(paste0("Overall intercept (EJ): ", round(overall_intercept_final, 4)))
## [1] "Overall intercept (EJ): 0.0467"
print("Regional intercepts (EJ):")
## [1] "Regional intercepts (EJ):"
print(region_intercepts_final %>%
dplyr::select(region, intercept) %>%
mutate(intercept = round(intercept, 4)))
## region intercept
## 1 Australia_NZ -0.0002
## 2 Brazil 0.3701
## 3 Canada -0.1335
## 4 Central Asia -0.0936
## 5 China 0.1350
## 6 Colombia -0.0351
## 7 EU-12 0.0097
## 8 EU-15 -0.3552
## 9 Europe_Eastern -0.0056
## 10 Europe_Non_EU -0.0889
## 11 European Free Trade Association -0.0866
## 12 Japan -0.2567
## 13 Mexico -0.0862
## 14 Russia 0.1275
## 15 South America_Northern -0.0632
## 16 South Korea -0.0601
## 17 Southeast Asia 0.0608
## 18 Taiwan -0.0630
## 19 USA -0.2449
print("Combined regional intercepts, using the overall intercept for regions without regional intercepts, and adding the overall intercept to the specific regional intercepts (so now only one value needs to be added for all regions):")
## [1] "Combined regional intercepts, using the overall intercept for regions without regional intercepts, and adding the overall intercept to the specific regional intercepts (so now only one value needs to be added for all regions):"
region_intercepts_final_to_save <- GCAM_region_names %>%
left_join(region_intercepts_final %>%
dplyr::select(region, intercept)) %>%
mutate(intercept = round(replace_na(intercept, 0) + overall_intercept_final, 4),
units = "EJ")
kable(region_intercepts_final_to_save)
| GCAM_region_ID | region | intercept | units |
|---|---|---|---|
| 1 | USA | -0.1982 | EJ |
| 2 | Africa_Eastern | 0.0467 | EJ |
| 3 | Africa_Northern | 0.0467 | EJ |
| 4 | Africa_Southern | 0.0467 | EJ |
| 5 | Africa_Western | 0.0467 | EJ |
| 6 | Australia_NZ | 0.0465 | EJ |
| 7 | Brazil | 0.4168 | EJ |
| 8 | Canada | -0.0867 | EJ |
| 9 | Central America and Caribbean | 0.0467 | EJ |
| 10 | Central Asia | -0.0469 | EJ |
| 11 | China | 0.1817 | EJ |
| 12 | EU-12 | 0.0565 | EJ |
| 13 | EU-15 | -0.3085 | EJ |
| 14 | Europe_Eastern | 0.0412 | EJ |
| 15 | Europe_Non_EU | -0.0422 | EJ |
| 16 | European Free Trade Association | -0.0399 | EJ |
| 17 | India | 0.0467 | EJ |
| 18 | Indonesia | 0.0467 | EJ |
| 19 | Japan | -0.2100 | EJ |
| 20 | Mexico | -0.0395 | EJ |
| 21 | Middle East | 0.0467 | EJ |
| 22 | Pakistan | 0.0467 | EJ |
| 23 | Russia | 0.1742 | EJ |
| 24 | South Africa | 0.0467 | EJ |
| 25 | South America_Northern | -0.0165 | EJ |
| 26 | South America_Southern | 0.0467 | EJ |
| 27 | South Asia | 0.0467 | EJ |
| 28 | South Korea | -0.0134 | EJ |
| 29 | Southeast Asia | 0.1075 | EJ |
| 30 | Taiwan | -0.0163 | EJ |
| 31 | Argentina | 0.0467 | EJ |
| 32 | Colombia | 0.0117 | EJ |
write_csv(region_intercepts_final_to_save, paste0(fig_dir, "/EJ_nonstaples_GDP_lm_intercepts_regional.csv"))
write_csv(data.frame(variable = c("nonstaples calories", "GDP"),
coefficient = c(round(nonstaples_slope_final, 4), round(GDP_slope_final, 12)),
units = c("EJ per PKcal", "EJ per million 1990 USD")),
paste0(fig_dir, "/EJ_nonstaples_GDP_lm_coefs.csv"))
Plotting figures showing the original food processing energy use, added energy for some regions and years, and final food processing energy use.
# Recalculate food processing energy use - add the infilled values to food processing energy use
L1328.in_EJ_R_food_F_Yh_recal %>%
full_join(L1328.EJ_R_food_F_Yh_est_to_infill_adders %>% select(GCAM_region_ID, fuel, year, infill_fuel),
by = c("GCAM_region_ID", "fuel", "year")) %>%
mutate(value = replace_na(value, 0),
infill_fuel = replace_na(infill_fuel, 0),
new_value = value + infill_fuel) %>%
left_join(GCAM_region_names) ->
L1328.in_EJ_R_food_F_Yh_recal_2_full
L1328.in_EJ_R_food_F_Yh_recal_2_full_longer <- L1328.in_EJ_R_food_F_Yh_recal_2_full %>%
rename(original = value, updated = new_value, infill = infill_fuel) %>%
pivot_longer(cols = c(original, infill, updated), names_to = "value_type", values_to = "value") %>%
mutate(value_type = factor(value_type, levels = c("original", "infill", "updated")))
# Calculate the coefficients of food processing energy use per calorie consumed
L1328.in_EJ_R_food_Yh_recal_2_full_longer <- L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>%
# summarize to total energy
group_by(GCAM_region_ID, region, year, sector, value_type) %>%
summarize(value = sum(value)) %>%
ungroup()
L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs <- L1328.in_EJ_R_food_Yh_recal_2_full_longer %>%
filter(value_type != "infill") %>%
rename(EJ = value) %>%
left_join(L1328.out_Pcal_R_food_Yh %>% rename(Pcal = value)) %>%
mutate(EJ_per_Pcal = EJ / Pcal,
EJ_per_PKcal = EJ_per_Pcal * 1000)
# Recalculate other industry energy use - subtract off the infilled values allocated to food processing
L1328.in_EJ_R_indenergy_F_Yh_tmp_2 %>%
left_join(L1328.EJ_R_food_F_Yh_est_to_infill_adders %>% select(GCAM_region_ID, fuel, year, infill_fuel),
by = c("GCAM_region_ID", "fuel", "year")) %>%
mutate(infill_fuel = replace_na(infill_fuel, 0),
new_value = value - infill_fuel) %>%
left_join(GCAM_region_names) ->
L1328.in_EJ_R_indenergy_F_Yh_full
L1328.in_EJ_R_indenergy_F_Yh_full_longer <- L1328.in_EJ_R_indenergy_F_Yh_full %>%
rename(original = value, updated = new_value, removed = infill_fuel) %>%
pivot_longer(cols = c(original, removed, updated), names_to = "value_type", values_to = "value") %>%
mutate(value_type = factor(value_type, levels = c("original", "removed", "updated")))
# plot
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% filter(year >= 1990),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use, before and after infilling") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata.png"), fig, width = 15, height = 35)
fig <- ggplot(L1328.in_EJ_R_indenergy_F_Yh_full_longer %>% filter(year >= 1990),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Other industry energy use, before and after infilling") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/other_industry_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata.png"), fig, width = 15, height = 35)
# plot just 2015 before and after infilling for each region
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>%
filter(year == 2015),
aes(x = value_type, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, nrow = 8, scales = "free") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use in 2015, before and after infilling") +
theme_classic()
fig
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2015.png"), fig, width = 15, height = 35)
fig <- ggplot(L1328.in_EJ_R_indenergy_F_Yh_full_longer %>%
filter(year == 2015),
aes(x = value_type, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, nrow = 8, scales = "free") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Other industry energy use in 2015, before and after infilling") +
theme_classic()
fig
ggsave(paste0(fig_dir, "/other_industry_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2015.png"), fig, width = 15, height = 35)
# plot just a few selected regions
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% filter(year >= 1990 & value_type != "infill" & region %in% c("China", "Southeast Asia", "India", "Pakistan")) %>%
mutate(region = factor(region, levels = c("China", "Southeast Asia", "India", "Pakistan"))),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use, before and after infilling") +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic() +
theme(text = element_text(size = 20))
fig
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_sel.png"), fig, width = 10, height = 10)
# plot just regions that had infilling happen
regions_sel_infilled <- L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>%
filter(value_type == "infill" & year >= 1990) %>%
group_by(region) %>%
filter(!all(value == 0)) %>%
ungroup() %>%
pull(region) %>%
unique()
fig <- ggplot(L1328.in_EJ_R_food_F_Yh_recal_2_full_longer %>% filter(year >= 1990 & region %in% regions_sel_infilled),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_grid(rows = vars(region), cols = vars(value_type), scales = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use, before and after infilling, for regions where infilling is performed") +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic() +
theme(text = element_text(size = 25))
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_sel_infilled.png"), fig, width = 20, height = 30)
# plot the energy per calorie coefficients
L1328.in_EJ_R_food_Yh_recal_2_full_coefs <- L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs %>%
dplyr::select(GCAM_region_ID, region, year, sector, value_type, EJ_per_PKcal) %>%
pivot_wider(names_from = "value_type", values_from = "EJ_per_PKcal") %>%
mutate(Units = "EJ per PKcal")
L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs_sel <- L1328.in_EJ_R_food_Yh_recal_2_full_coefs %>%
mutate(updated = ifelse(updated == original, NA, updated)) %>%
pivot_longer(c(original, updated), names_to = "value_type", values_to = "EJ_per_PKcal") %>%
filter(!is.na(EJ_per_PKcal))
fig <- ggplot(L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs %>%
filter(year >= 1990),
aes(x = year, y = EJ_per_PKcal, color = value_type)) +
geom_point(size = 2) +
facet_wrap(~region, nrow = 8) +
scale_color_manual(values = c("darkgreen", "mediumpurple"), name = "") +
labs(x = "", y = "EJ per PKcal", title = "Food processing energy use coefficients, before and after infilling") +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_coef_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata.png"), fig, width = 15, height = 35)
fig <- ggplot(L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs_sel %>%
filter(year >= 1990),
aes(x = year, y = EJ_per_PKcal, color = value_type)) +
geom_point(size = 2) +
facet_wrap(~region, nrow = 8) +
scale_color_manual(values = c("skyblue", "mediumpurple"), name = "") +
labs(x = "", y = "EJ per PKcal", title = "Food processing energy use coefficients, before and after infilling") +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_coef_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2.png"), fig, width = 15, height = 35)
fig <- ggplot(L1328.in_EJ_R_food_Yh_recal_2_full_longer_w_coefs %>%
filter(year == 2015) %>%
mutate(region = factor(region, levels = unique((L1328.in_EJ_R_food_Yh_recal_2_full_coefs %>% filter(year == 2015) %>% mutate(unchanged = original == updated) %>% arrange(desc(unchanged)))$region))),
aes(x = region, y = EJ_per_PKcal, fill = value_type)) +
geom_col(position = "dodge") +
scale_fill_manual(values = c("#00931d", "deepskyblue1"), name = "") +
labs(x = "", y = "EJ per PKcal", title = "Food processing energy use coefficients, before and after infilling, in 2015") +
theme_classic() +
theme(axis.text.x = element_text(angle = 55, vjust = 1, hjust = 1),
text = element_text(size = 20))
fig
ggsave(paste0(fig_dir, "/foodpro_energy_use_coef_by_GCAM_region_fuel_hist_pre_post_infill_gcamdata_2015.png"), fig, width = 12, height = 8)
This is done just for comparison, as this was the original methodology. However, for consistency in data–since the coefficient of food processing energy use per calorie will actually be applied to the gcamdata processed food consumption data–the relationships derived using the gcamdata processed food consumption data will be used in the breakout.
First join data.
# first join the food processing energy use data with the food production data
comb_data_all <- IEA_foodpro_region_total_en %>%
dplyr::select(iso, country_name, GCAM_region_ID, year, EJ = value) %>%
full_join(total_cal_food_kt_all_years %>%
dplyr::select(iso, year, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie, MKcal, calperg_food_w_calorie)) %>%
full_join(GDP_per_cap_hist_calc_FAO %>%
dplyr::select(iso, year, GCAM_region_ID, GDP_pcap_thous2015USD_FAO, pop, GDP_mil2015USD_FAO)) %>%
left_join(total_feed_kt_all_years %>%
dplyr::select(iso, year, GCAM_region_ID, Feed_Kt)) %>%
filter(year %in% all_IEA_years & !is.na(EJ)) %>%
arrange(country_name, year) %>%
# convert some units
mutate(PKcal = MKcal / (10^9))
# select just regions that do not have zero food processing energy use in all years, and also do not have no food production in all years
comb_data_sel <- comb_data_all %>%
group_by(iso) %>%
filter(!all(EJ == 0) & !all(is.na(MKcal) | MKcal == 0)) %>%
ungroup()
# repeat for the food data with category of food -- add the food production by category and commodity to the rest of the data
comb_data_all_cats <- comb_data_all %>%
left_join(total_cal_food_kt_all_years_cats %>%
# convert some units
mutate(PKcal = MKcal / (10^9)) %>%
dplyr::select(iso, year, GCAM_region_ID, commodity_type_broad, PKcal) %>%
pivot_wider(names_from = commodity_type_broad, values_from = PKcal)) %>%
mutate(nonstaples = nonstaples_animal + nonstaples_other,
staples_frac = staples / PKcal,
nonstaples_frac = nonstaples / PKcal,
EJ_per_PKcal = EJ / PKcal,
EJ_pcap = EJ / pop) %>%
left_join(total_cal_food_kt_all_years_commodity %>%
# convert some units
mutate(PKcal = MKcal / (10^9)) %>%
dplyr::select(iso, year, GCAM_region_ID, GCAM_commodity, PKcal) %>%
pivot_wider(names_from = GCAM_commodity, values_from = PKcal))
# select just regions that do not have zero food processing energy use in all years, and also do not have no food production in all years
comb_data_sel_cats <- comb_data_all_cats %>%
group_by(iso) %>%
filter(!all(EJ == 0) & !all(is.na(MKcal) | MKcal == 0)) %>%
ungroup()
Now filter data, using the same criteria, and calculate relationships.
IEA_ind_sectors_region_total_en_frac_wider <- IEA_ind_sectors_region_total_en %>%
dplyr::select(-value) %>%
pivot_wider(names_from = FLOW, values_from = frac)
# get regions and years to include, satisfying our criteria established earlier
country_years_incl <- IEA_ind_sectors_region_total_en_frac_wider %>%
# select only desired years in which INONSPEC fraction is not too high and food processing fraction is not too low, or in which food processing fraction is high even if INONSPEC fraction is high
filter(year >= min_year & ((INONSPEC < max_frac_inonspec & (!is.na(FOODPRO) & FOODPRO > min_frac_foodpro)) | FOODPRO > override_frac_foodpro)) %>%
dplyr::select(iso, country_name, GCAM_region_ID, year)
fig <- ggplot(IEA_ind_sectors_region_fuel %>% filter(FLOW == "FOODPRO") %>% right_join(country_years_incl),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~country_name, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_country_fuel_hist_IEA_sel.png"), fig, width = 35, height = 20)
# filter the food and energy data
comb_data_sel_cats_final <- comb_data_sel_cats %>%
right_join(country_years_incl) %>%
filter(!is.na(MKcal),
MKcal > 0)
# get data to infill
comb_data_all_cats_infill <- comb_data_all_cats %>%
anti_join(country_years_incl)
# calculate relationships
# just going to infill from the minimum year onwards for now
hist_and_infilled_test_methods_country <- run_models_calc_infill(comb_data_sel_cats_final %>% rename(region = country_name, GDP = GDP_mil2015USD_FAO),
comb_data_all_cats_infill %>% filter(year >= min_year) %>%
rename(region = country_name, GDP = GDP_mil2015USD_FAO)) %>%
rename(country_name = region, GDP_mil2015USD_FAO = GDP)
##
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74877 -0.00110 0.00436 0.01927 0.80324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.03354 0.01518 68.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1239 on 1560 degrees of freedom
## Multiple R-squared: 0.7483, Adjusted R-squared: 0.7482
## F-statistic: 4638 on 1 and 1560 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71065 -0.02981 -0.02397 -0.00848 0.79290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029077 0.003228 9.007 <2e-16 ***
## PKcal 0.988429 0.015625 63.260 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1208 on 1559 degrees of freedom
## Multiple R-squared: 0.7196, Adjusted R-squared: 0.7195
## F-statistic: 4002 on 1 and 1559 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64540 -0.00386 -0.00007 0.00287 0.35023
##
## Coefficients:
## Estimate Std. Error t value
## PKcal 2.4655702 0.0702549 35.095
## regionAlbania -0.0072761 0.0139865 -0.520
## regionAlgeria -0.0973940 0.0220438 -4.418
## regionArgentina -0.0063869 0.0577711 -0.111
## regionArmenia -0.0042379 0.0407703 -0.104
## regionAustralia 0.0536709 0.0114680 4.680
## regionAustria -0.0136003 0.0113507 -1.198
## regionAzerbaijan -0.0178580 0.0160087 -1.116
## regionBelarus 0.0018460 0.0113359 0.163
## regionBelgium -0.0029741 0.0113717 -0.262
## regionBenin -0.0211867 0.0173949 -1.218
## regionBosnia and Herzegovina -0.0088659 0.0203869 -0.435
## regionBotswana -0.0032479 0.0135904 -0.239
## regionBrazil 0.1504994 0.0189928 7.924
## regionBulgaria -0.0105671 0.0113274 -0.933
## regionCameroon -0.0498435 0.0204354 -2.439
## regionCanada -0.0677119 0.0177510 -3.815
## regionChina -2.3957724 0.1029992 -23.260
## regionColombia -0.0542326 0.0117267 -4.625
## regionCongo -0.0077665 0.0288296 -0.269
## regionCosta Rica 0.0035419 0.0113125 0.313
## regionCote dIvoire -0.0458530 0.0167130 -2.744
## regionCroatia -0.0030947 0.0113135 -0.274
## regionCyprus -0.0020856 0.0115317 -0.181
## regionCzech Republic -0.0074207 0.0123351 -0.602
## regionDenmark 0.0105927 0.0113210 0.936
## regionDominican Republic -0.0048202 0.0113234 -0.426
## regionEstonia 0.0001313 0.0117698 0.011
## regionFinland -0.0016292 0.0113178 -0.144
## regionFrance -0.0521288 0.0133009 -3.919
## regionGeorgia -0.0112524 0.0144185 -0.780
## regionGermany -0.0791826 0.0138329 -5.724
## regionGreece -0.0210734 0.0113762 -1.852
## regionHungary -0.0096319 0.0113469 -0.849
## regionIceland 0.0019442 0.0113075 0.172
## regionIreland 0.0049661 0.0113155 0.439
## regionIsrael -0.0207735 0.0166597 -1.247
## regionItaly -0.1173068 0.0132587 -8.848
## regionJamaica -0.0057791 0.0407701 -0.142
## regionJapan -0.1219481 0.0149626 -8.150
## regionKazakhstan -0.0200536 0.0192668 -1.041
## regionKorea, Republic of -0.0815894 0.0120981 -6.744
## regionKyrgyzstan -0.0121062 0.0332907 -0.364
## regionLatvia -0.0003774 0.0113093 -0.033
## regionLithuania -0.0013741 0.0113113 -0.121
## regionLuxembourg -0.0007518 0.0144143 -0.052
## regionMacedonia, the former Yugoslav Republic of -0.0040456 0.0113087 -0.358
## regionMalta -0.0011922 0.0235384 -0.051
## regionMexico -0.2249034 0.0145355 -15.473
## regionMoldova, Republic of -0.0055319 0.0120264 -0.460
## regionMontenegro -0.0017037 0.0173843 -0.098
## regionMorocco -0.0970869 0.0169038 -5.743
## regionMyanmar -0.1215420 0.0577769 -2.104
## regionNetherlands 0.0316258 0.0114272 2.768
## regionNew Zealand 0.0067845 0.0113147 0.600
## regionNorway 0.0001391 0.0113167 0.012
## regionPhilippines -0.1486343 0.0130215 -11.415
## regionPoland -0.0452449 0.0119420 -3.789
## regionPortugal -0.0139507 0.0113497 -1.229
## regionRomania -0.0381717 0.0119456 -3.195
## regionRussian Federation -0.0756132 0.0171203 -4.417
## regionSenegal -0.0297852 0.0174063 -1.711
## regionSerbia -0.0039329 0.0173981 -0.226
## regionSlovakia -0.0083704 0.0113173 -0.740
## regionSlovenia -0.0031142 0.0113090 -0.275
## regionSpain -0.0451155 0.0119758 -3.767
## regionSudan -0.0732998 0.0175385 -4.179
## regionSweden -0.0113478 0.0113414 -1.001
## regionSwitzerland -0.0138847 0.0113357 -1.225
## regionTaiwan -0.0390163 0.0114625 -3.404
## regionTajikistan -0.0145059 0.0139903 -1.037
## regionThailand 0.0444830 0.0123877 3.591
## regionTogo -0.0141698 0.0166491 -0.851
## regionTunisia -0.0293290 0.0174113 -1.684
## regionTurkey -0.1892793 0.0132259 -14.311
## regionUkraine -0.0700790 0.0170961 -4.099
## regionUnited Kingdom -0.0607865 0.0126442 -4.807
## regionUnited States of America -0.1417882 0.0343651 -4.126
## regionUruguay 0.0008953 0.0332894 0.027
## regionVenezuela -0.0573085 0.0115010 -4.983
## regionViet Nam -0.1384807 0.0244328 -5.668
## regionZimbabwe -0.0217735 0.0218032 -0.999
## Pr(>|t|)
## PKcal < 2e-16 ***
## regionAlbania 0.602984
## regionAlgeria 1.07e-05 ***
## regionArgentina 0.911983
## regionArmenia 0.917226
## regionAustralia 3.13e-06 ***
## regionAustria 0.231034
## regionAzerbaijan 0.264809
## regionBelarus 0.870665
## regionBelgium 0.793718
## regionBenin 0.223425
## regionBosnia and Herzegovina 0.663712
## regionBotswana 0.811150
## regionBrazil 4.49e-15 ***
## regionBulgaria 0.351035
## regionCameroon 0.014842 *
## regionCanada 0.000142 ***
## regionChina < 2e-16 ***
## regionColombia 4.08e-06 ***
## regionCongo 0.787664
## regionCosta Rica 0.754249
## regionCote dIvoire 0.006151 **
## regionCroatia 0.784476
## regionCyprus 0.856503
## regionCzech Republic 0.547535
## regionDenmark 0.349596
## regionDominican Republic 0.670402
## regionEstonia 0.991103
## regionFinland 0.885557
## regionFrance 9.29e-05 ***
## regionGeorgia 0.435273
## regionGermany 1.26e-08 ***
## regionGreece 0.064165 .
## regionHungary 0.396100
## regionIceland 0.863509
## regionIreland 0.660817
## regionIsrael 0.212619
## regionItaly < 2e-16 ***
## regionJamaica 0.887298
## regionJapan 7.66e-16 ***
## regionKazakhstan 0.298120
## regionKorea, Republic of 2.20e-11 ***
## regionKyrgyzstan 0.716170
## regionLatvia 0.973384
## regionLithuania 0.903328
## regionLuxembourg 0.958412
## regionMacedonia, the former Yugoslav Republic of 0.720584
## regionMalta 0.959613
## regionMexico < 2e-16 ***
## regionMoldova, Republic of 0.645596
## regionMontenegro 0.921945
## regionMorocco 1.12e-08 ***
## regionMyanmar 0.035578 *
## regionNetherlands 0.005718 **
## regionNew Zealand 0.548853
## regionNorway 0.990197
## regionPhilippines < 2e-16 ***
## regionPoland 0.000158 ***
## regionPortugal 0.219202
## regionRomania 0.001426 **
## regionRussian Federation 1.08e-05 ***
## regionSenegal 0.087259 .
## regionSerbia 0.821189
## regionSlovakia 0.459653
## regionSlovenia 0.783067
## regionSpain 0.000172 ***
## regionSudan 3.09e-05 ***
## regionSweden 0.317200
## regionSwitzerland 0.220820
## regionTaiwan 0.000682 ***
## regionTajikistan 0.299971
## regionThailand 0.000340 ***
## regionTogo 0.394861
## regionTunisia 0.092299 .
## regionTurkey < 2e-16 ***
## regionUkraine 4.37e-05 ***
## regionUnited Kingdom 1.68e-06 ***
## regionUnited States of America 3.90e-05 ***
## regionUruguay 0.978548
## regionVenezuela 7.00e-07 ***
## regionViet Nam 1.74e-08 ***
## regionZimbabwe 0.318133
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05766 on 1479 degrees of freedom
## Multiple R-squared: 0.9483, Adjusted R-squared: 0.9454
## F-statistic: 330.9 on 82 and 1479 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64540 -0.00386 -0.00007 0.00287 0.35023
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -0.0072761 0.0139865 -0.520
## PKcal 2.4655702 0.0702549 35.095
## regionAlgeria -0.0901179 0.0260722 -3.456
## regionArgentina 0.0008892 0.0594236 0.015
## regionArmenia 0.0030382 0.0431012 0.070
## regionAustralia 0.0609470 0.0180584 3.375
## regionAustria -0.0063242 0.0179980 -0.351
## regionAzerbaijan -0.0105819 0.0212484 -0.498
## regionBelarus 0.0091221 0.0179914 0.507
## regionBelgium 0.0043020 0.0180079 0.239
## regionBenin -0.0139106 0.0223131 -0.623
## regionBosnia and Herzegovina -0.0015898 0.0247202 -0.064
## regionBotswana 0.0040282 0.0195002 0.207
## regionBrazil 0.1577755 0.0234120 6.739
## regionBulgaria -0.0032910 0.0179880 -0.183
## regionCameroon -0.0425674 0.0247478 -1.720
## regionCanada -0.0604358 0.0225563 -2.679
## regionChina -2.3884962 0.1036787 -23.037
## regionColombia -0.0469565 0.0182061 -2.579
## regionCongo -0.0004904 0.0320411 -0.015
## regionCosta Rica 0.0108180 0.0179837 0.602
## regionCote dIvoire -0.0385769 0.0217745 -1.772
## regionCroatia 0.0041814 0.0179838 0.233
## regionCyprus 0.0051905 0.0181262 0.286
## regionCzech Republic -0.0001446 0.0186339 -0.008
## regionDenmark 0.0178688 0.0179858 0.993
## regionDominican Republic 0.0024559 0.0179866 0.137
## regionEstonia 0.0074074 0.0182780 0.405
## regionFinland 0.0056469 0.0179848 0.314
## regionFrance -0.0448527 0.0192031 -2.336
## regionGeorgia -0.0039763 0.0200830 -0.198
## regionGermany -0.0719065 0.0195620 -3.676
## regionGreece -0.0137973 0.0180102 -0.766
## regionHungary -0.0023558 0.0179962 -0.131
## regionIceland 0.0092203 0.0179852 0.513
## regionIreland 0.0122422 0.0179842 0.681
## regionIsrael -0.0134974 0.0217435 -0.621
## regionItaly -0.1100307 0.0191750 -5.738
## regionJamaica 0.0014970 0.0431012 0.035
## regionJapan -0.1146719 0.0203523 -5.634
## regionKazakhstan -0.0127775 0.0237928 -0.537
## regionKorea, Republic of -0.0743133 0.0184301 -4.032
## regionKyrgyzstan -0.0048301 0.0361064 -0.134
## regionLatvia 0.0068987 0.0179836 0.384
## regionLithuania 0.0059020 0.0179835 0.328
## regionLuxembourg 0.0065243 0.0200840 0.325
## regionMacedonia, the former Yugoslav Republic of 0.0032305 0.0179838 0.180
## regionMalta 0.0060839 0.0273798 0.222
## regionMexico -0.2176272 0.0200494 -10.855
## regionMoldova, Republic of 0.0017442 0.0184414 0.095
## regionMontenegro 0.0055724 0.0223115 0.250
## regionMorocco -0.0898107 0.0219036 -4.100
## regionMyanmar -0.1142659 0.0594288 -1.923
## regionNetherlands 0.0389019 0.0180364 2.157
## regionNew Zealand 0.0140606 0.0179840 0.782
## regionNorway 0.0074152 0.0179845 0.412
## regionPhilippines -0.1413581 0.0190184 -7.433
## regionPoland -0.0379688 0.0183347 -2.071
## regionPortugal -0.0066746 0.0179975 -0.371
## regionRomania -0.0308956 0.0183634 -1.682
## regionRussian Federation -0.0683371 0.0219550 -3.113
## regionSenegal -0.0225091 0.0223188 -1.009
## regionSerbia 0.0033432 0.0223146 0.150
## regionSlovakia -0.0010943 0.0179847 -0.061
## regionSlovenia 0.0041619 0.0179837 0.231
## regionSpain -0.0378393 0.0183552 -2.062
## regionSudan -0.0660237 0.0224047 -2.947
## regionSweden -0.0040717 0.0179938 -0.226
## regionSwitzerland -0.0066086 0.0179913 -0.367
## regionTaiwan -0.0317401 0.0180554 -1.758
## regionTajikistan -0.0072298 0.0197768 -0.366
## regionThailand 0.0517591 0.0186104 2.781
## regionTogo -0.0068936 0.0217392 -0.317
## regionTunisia -0.0220529 0.0223215 -0.988
## regionTurkey -0.1820032 0.0191532 -9.502
## regionUkraine -0.0628029 0.0220407 -2.849
## regionUnited Kingdom -0.0535103 0.0187736 -2.850
## regionUnited States of America -0.1345121 0.0368658 -3.649
## regionUruguay 0.0081714 0.0361062 0.226
## regionVenezuela -0.0500324 0.0180766 -2.768
## regionViet Nam -0.1312046 0.0280901 -4.671
## regionZimbabwe -0.0144974 0.0258965 -0.560
## Pr(>|t|)
## (Intercept) 0.602984
## PKcal < 2e-16 ***
## regionAlgeria 0.000563 ***
## regionArgentina 0.988064
## regionArmenia 0.943813
## regionAustralia 0.000757 ***
## regionAustria 0.725349
## regionAzerbaijan 0.618552
## regionBelarus 0.612214
## regionBelgium 0.811219
## regionBenin 0.533101
## regionBosnia and Herzegovina 0.948731
## regionBotswana 0.836371
## regionBrazil 2.28e-11 ***
## regionBulgaria 0.854858
## regionCameroon 0.085633 .
## regionCanada 0.007459 **
## regionChina < 2e-16 ***
## regionColombia 0.010000 **
## regionCongo 0.987791
## regionCosta Rica 0.547567
## regionCote dIvoire 0.076658 .
## regionCroatia 0.816174
## regionCyprus 0.774647
## regionCzech Republic 0.993809
## regionDenmark 0.320630
## regionDominican Republic 0.891411
## regionEstonia 0.685343
## regionFinland 0.753580
## regionFrance 0.019640 *
## regionGeorgia 0.843078
## regionGermany 0.000246 ***
## regionGreece 0.443748
## regionHungary 0.895870
## regionIceland 0.608264
## regionIreland 0.496156
## regionIsrael 0.534856
## regionItaly 1.16e-08 ***
## regionJamaica 0.972298
## regionJapan 2.10e-08 ***
## regionKazakhstan 0.591326
## regionKorea, Republic of 5.81e-05 ***
## regionKyrgyzstan 0.893600
## regionLatvia 0.701322
## regionLithuania 0.742814
## regionLuxembourg 0.745339
## regionMacedonia, the former Yugoslav Republic of 0.857466
## regionMalta 0.824185
## regionMexico < 2e-16 ***
## regionMoldova, Republic of 0.924662
## regionMontenegro 0.802810
## regionMorocco 4.35e-05 ***
## regionMyanmar 0.054705 .
## regionNetherlands 0.031178 *
## regionNew Zealand 0.434435
## regionNorway 0.680172
## regionPhilippines 1.79e-13 ***
## regionPoland 0.038544 *
## regionPortugal 0.710793
## regionRomania 0.092693 .
## regionRussian Federation 0.001890 **
## regionSenegal 0.313367
## regionSerbia 0.880927
## regionSlovakia 0.951491
## regionSlovenia 0.817015
## regionSpain 0.039429 *
## regionSudan 0.003260 **
## regionSweden 0.821012
## regionSwitzerland 0.713433
## regionTaiwan 0.078966 .
## regionTajikistan 0.714737
## regionThailand 0.005485 **
## regionTogo 0.751208
## regionTunisia 0.323332
## regionTurkey < 2e-16 ***
## regionUkraine 0.004441 **
## regionUnited Kingdom 0.004428 **
## regionUnited States of America 0.000273 ***
## regionUruguay 0.820987
## regionVenezuela 0.005714 **
## regionViet Nam 3.27e-06 ***
## regionZimbabwe 0.575686
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05766 on 1479 degrees of freedom
## Multiple R-squared: 0.9394, Adjusted R-squared: 0.9361
## F-statistic: 283.2 on 81 and 1479 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + GDP, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55185 -0.01705 -0.01003 -0.00088 0.74076
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.060e-02 2.439e-03 4.349 1.46e-05 ***
## PKcal 7.114e-01 1.342e-02 53.012 < 2e-16 ***
## GDP 4.933e-08 1.287e-09 38.326 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08747 on 1499 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.8581, Adjusted R-squared: 0.8579
## F-statistic: 4532 on 2 and 1499 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + GDP + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37483 -0.00362 -0.00014 0.00340 0.26306
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -2.350e-03 1.078e-02 -0.218
## PKcal 9.769e-01 7.121e-02 13.717
## GDP 8.919e-08 2.751e-09 32.423
## regionAlgeria -3.804e-02 2.016e-02 -1.887
## regionArgentina 1.511e-02 4.581e-02 0.330
## regionArmenia 2.055e-03 3.323e-02 0.062
## regionAustralia 1.788e-02 1.398e-02 1.279
## regionAustria -1.861e-02 1.388e-02 -1.341
## regionAzerbaijan -3.225e-03 1.638e-02 -0.197
## regionBelarus 1.676e-02 1.410e-02 1.189
## regionBelgium -5.641e-03 1.551e-02 -0.364
## regionBenin -6.494e-03 1.720e-02 -0.377
## regionBosnia and Herzegovina -1.654e-03 1.906e-02 -0.087
## regionBotswana 5.382e-04 1.503e-02 0.036
## regionBrazil 3.557e-01 1.908e-02 18.645
## regionBulgaria 2.495e-03 1.387e-02 0.180
## regionCameroon -1.924e-02 1.909e-02 -1.008
## regionCanada -1.163e-01 1.747e-02 -6.654
## regionChina -6.228e-01 9.699e-02 -6.421
## regionColombia -2.667e-03 1.410e-02 -0.189
## regionCongo -1.029e-03 2.470e-02 -0.042
## regionCosta Rica 9.833e-03 1.386e-02 0.709
## regionCote dIvoire -1.394e-02 1.680e-02 -0.830
## regionCroatia 3.167e-03 1.409e-02 0.225
## regionCyprus -1.234e-04 1.397e-02 -0.009
## regionCzech Republic 3.409e-03 1.437e-02 0.237
## regionDenmark 1.428e-03 1.387e-02 0.103
## regionDominican Republic 6.655e-03 1.387e-02 0.480
## regionEstonia 3.515e-03 1.409e-02 0.249
## regionFinland -6.809e-03 1.387e-02 -0.491
## regionFrance -7.975e-02 1.484e-02 -5.373
## regionGeorgia -2.359e-03 1.548e-02 -0.152
## regionGermany -1.641e-01 1.534e-02 -10.695
## regionGreece -1.064e-02 1.388e-02 -0.766
## regionHungary 3.825e-03 1.387e-02 0.276
## regionIceland 3.710e-03 1.387e-02 0.268
## regionIreland 1.448e-03 1.387e-02 0.104
## regionIsrael -2.226e-02 1.676e-02 -1.328
## regionItaly -1.288e-01 1.479e-02 -8.704
## regionJamaica -3.381e-04 3.323e-02 -0.010
## regionJapan -2.699e-01 1.640e-02 -16.461
## regionKazakhstan -2.529e-03 1.834e-02 -0.138
## regionKorea, Republic of -6.956e-02 1.421e-02 -4.895
## regionKyrgyzstan -1.813e-03 2.783e-02 -0.065
## regionLatvia 3.817e-03 1.409e-02 0.271
## regionLithuania 3.460e-03 1.409e-02 0.246
## regionLuxembourg -1.948e-03 1.549e-02 -0.126
## regionMacedonia, the former Yugoslav Republic of 1.242e-03 1.409e-02 0.088
## regionMalta 1.139e-03 2.111e-02 0.054
## regionMexico -1.071e-01 1.584e-02 -6.761
## regionMoldova, Republic of 2.987e-03 1.435e-02 0.208
## regionMontenegro 1.639e-03 1.771e-02 0.092
## regionMorocco -3.927e-02 1.696e-02 -2.316
## regionMyanmar -4.591e-02 4.586e-02 -1.001
## regionNetherlands 1.195e-02 1.393e-02 0.858
## regionNew Zealand 6.262e-03 1.387e-02 0.452
## regionNorway -1.541e-02 1.388e-02 -1.110
## regionPhilippines -2.506e-02 1.510e-02 -1.660
## regionPoland 1.040e-02 1.421e-02 0.732
## regionPortugal -7.444e-03 1.387e-02 -0.537
## regionRomania -3.930e-03 1.418e-02 -0.277
## regionRussian Federation 1.045e-01 1.776e-02 5.883
## regionSenegal -1.016e-02 1.721e-02 -0.590
## regionSerbia 1.054e-02 1.772e-02 0.595
## regionSlovakia -1.435e-03 1.422e-02 -0.101
## regionSlovenia -4.230e-07 1.409e-02 0.000
## regionSpain -4.876e-02 1.415e-02 -3.445
## regionSudan -3.166e-02 2.007e-02 -1.577
## regionSweden -2.469e-02 1.389e-02 -1.778
## regionSwitzerland -4.477e-02 1.392e-02 -3.216
## regionTajikistan -3.645e-03 1.525e-02 -0.239
## regionThailand 1.300e-01 1.455e-02 8.932
## regionTogo -3.526e-03 1.676e-02 -0.210
## regionTunisia -9.943e-03 1.721e-02 -0.578
## regionTurkey -8.618e-02 1.506e-02 -5.721
## regionUkraine 5.628e-03 1.712e-02 0.329
## regionUnited Kingdom -1.496e-01 1.477e-02 -10.129
## regionUnited States of America -7.145e-01 3.353e-02 -21.308
## regionUruguay 4.154e-03 2.783e-02 0.149
## regionVenezuela -3.613e-02 1.394e-02 -2.591
## regionViet Nam -1.220e-02 2.197e-02 -0.555
## regionZimbabwe -6.372e-03 1.997e-02 -0.319
## Pr(>|t|)
## (Intercept) 0.827539
## PKcal < 2e-16 ***
## GDP < 2e-16 ***
## regionAlgeria 0.059429 .
## regionArgentina 0.741631
## regionArmenia 0.950686
## regionAustralia 0.201206
## regionAustria 0.180271
## regionAzerbaijan 0.843975
## regionBelarus 0.234646
## regionBelgium 0.716085
## regionBenin 0.705862
## regionBosnia and Herzegovina 0.930853
## regionBotswana 0.971449
## regionBrazil < 2e-16 ***
## regionBulgaria 0.857248
## regionCameroon 0.313776
## regionCanada 4.06e-11 ***
## regionChina 1.85e-10 ***
## regionColombia 0.850054
## regionCongo 0.966766
## regionCosta Rica 0.478263
## regionCote dIvoire 0.406893
## regionCroatia 0.822212
## regionCyprus 0.992954
## regionCzech Republic 0.812468
## regionDenmark 0.918041
## regionDominican Republic 0.631376
## regionEstonia 0.803026
## regionFinland 0.623561
## regionFrance 9.03e-08 ***
## regionGeorgia 0.878907
## regionGermany < 2e-16 ***
## regionGreece 0.443678
## regionHungary 0.782845
## regionIceland 0.789086
## regionIreland 0.916843
## regionIsrael 0.184449
## regionItaly < 2e-16 ***
## regionJamaica 0.991883
## regionJapan < 2e-16 ***
## regionKazakhstan 0.890357
## regionKorea, Republic of 1.10e-06 ***
## regionKyrgyzstan 0.948087
## regionLatvia 0.786489
## regionLithuania 0.806039
## regionLuxembourg 0.899917
## regionMacedonia, the former Yugoslav Republic of 0.929767
## regionMalta 0.956983
## regionMexico 2.00e-11 ***
## regionMoldova, Republic of 0.835173
## regionMontenegro 0.926316
## regionMorocco 0.020710 *
## regionMyanmar 0.316975
## regionNetherlands 0.391236
## regionNew Zealand 0.651602
## regionNorway 0.267231
## regionPhilippines 0.097194 .
## regionPoland 0.464334
## regionPortugal 0.591687
## regionRomania 0.781724
## regionRussian Federation 5.02e-09 ***
## regionSenegal 0.555037
## regionSerbia 0.552094
## regionSlovakia 0.919602
## regionSlovenia 0.999976
## regionSpain 0.000588 ***
## regionSudan 0.114958
## regionSweden 0.075575 .
## regionSwitzerland 0.001328 **
## regionTajikistan 0.811070
## regionThailand < 2e-16 ***
## regionTogo 0.833412
## regionTunisia 0.563588
## regionTurkey 1.29e-08 ***
## regionUkraine 0.742473
## regionUnited Kingdom < 2e-16 ***
## regionUnited States of America < 2e-16 ***
## regionUruguay 0.881379
## regionVenezuela 0.009663 **
## regionViet Nam 0.578749
## regionZimbabwe 0.749665
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04445 on 1420 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9653, Adjusted R-squared: 0.9633
## F-statistic: 487.5 on 81 and 1420 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58086 -0.00977 -0.00108 0.00567 0.61242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.60310 0.02297 113.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08127 on 1560 degrees of freedom
## Multiple R-squared: 0.8917, Adjusted R-squared: 0.8916
## F-statistic: 1.284e+04 on 1 and 1560 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57185 -0.01617 -0.00830 -0.00122 0.60981
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.007335 0.002215 3.311 0.000951 ***
## nonstaples 2.572089 0.024741 103.962 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08101 on 1559 degrees of freedom
## Multiple R-squared: 0.8739, Adjusted R-squared: 0.8739
## F-statistic: 1.081e+04 on 1 and 1559 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65301 -0.00331 -0.00010 0.00261 0.33872
##
## Coefficients:
## Estimate Std. Error t value
## nonstaples 2.6046215 0.0644683 40.402
## regionAlbania -0.0032397 0.0130532 -0.248
## regionAlgeria -0.0391350 0.0203919 -1.919
## regionArgentina 0.0378308 0.0538561 0.702
## regionArmenia -0.0011714 0.0380544 -0.031
## regionAustralia 0.0688185 0.0106324 6.473
## regionAustria -0.0070429 0.0105774 -0.666
## regionAzerbaijan -0.0001723 0.0149276 -0.012
## regionBelarus 0.0127213 0.0105630 1.204
## regionBelgium 0.0077959 0.0105832 0.737
## regionBenin -0.0063685 0.0162272 -0.392
## regionBosnia and Herzegovina -0.0036305 0.0190275 -0.191
## regionBotswana -0.0013702 0.0126848 -0.108
## regionBrazil 0.3528411 0.0133944 26.342
## regionBulgaria -0.0004413 0.0105596 -0.042
## regionCameroon -0.0229165 0.0190360 -1.204
## regionCanada -0.0346208 0.0163885 -2.113
## regionChina -0.1753702 0.0355725 -4.930
## regionColombia -0.0132803 0.0106881 -1.243
## regionCongo -0.0021041 0.0269085 -0.078
## regionCosta Rica 0.0076812 0.0105560 0.728
## regionCote dIvoire -0.0091306 0.0155409 -0.588
## regionCroatia 0.0017367 0.0105562 0.165
## regionCyprus -0.0012441 0.0107634 -0.116
## regionCzech Republic 0.0027686 0.0114914 0.241
## regionDenmark 0.0154988 0.0105604 1.468
## regionDominican Republic 0.0023519 0.0105600 0.223
## regionEstonia 0.0015060 0.0109855 0.137
## regionFinland 0.0025947 0.0105590 0.246
## regionFrance 0.0054369 0.0115370 0.471
## regionGeorgia -0.0040176 0.0134548 -0.299
## regionGermany -0.0118244 0.0117898 -1.003
## regionGreece -0.0079928 0.0105817 -0.755
## regionHungary -0.0005342 0.0105711 -0.051
## regionIceland 0.0021346 0.0105543 0.202
## regionIreland 0.0096378 0.0105574 0.913
## regionIsrael -0.0124073 0.0155412 -0.798
## regionItaly -0.0428042 0.0113482 -3.772
## regionJamaica -0.0031538 0.0380543 -0.083
## regionJapan 0.0377823 0.0114966 3.286
## regionKazakhstan -0.0022879 0.0179541 -0.127
## regionKorea, Republic of -0.0062954 0.0107193 -0.587
## regionKyrgyzstan -0.0045335 0.0310715 -0.146
## regionLatvia 0.0019760 0.0105550 0.187
## regionLithuania 0.0030136 0.0105554 0.286
## regionLuxembourg -0.0003160 0.0134542 -0.023
## regionMacedonia, the former Yugoslav Republic of -0.0014692 0.0105546 -0.139
## regionMalta -0.0007605 0.0219706 -0.035
## regionMexico -0.0805949 0.0114202 -7.057
## regionMoldova, Republic of 0.0005263 0.0112222 0.047
## regionMontenegro -0.0010187 0.0162264 -0.063
## regionMorocco -0.0332653 0.0155667 -2.137
## regionMyanmar -0.0600785 0.0538439 -1.116
## regionNetherlands 0.0420697 0.0106195 3.962
## regionNew Zealand 0.0100785 0.0105577 0.955
## regionNorway 0.0045596 0.0105582 0.432
## regionPhilippines -0.0103961 0.0107787 -0.965
## regionPoland 0.0006177 0.0107814 0.057
## regionPortugal -0.0021505 0.0105690 -0.203
## regionRomania -0.0033850 0.0110234 -0.307
## regionRussian Federation 0.1215281 0.0124793 9.738
## regionSenegal -0.0107542 0.0162289 -0.663
## regionSerbia 0.0057513 0.0162304 0.354
## regionSlovakia -0.0027358 0.0105578 -0.259
## regionSlovenia -0.0007592 0.0105548 -0.072
## regionSpain -0.0077251 0.0108466 -0.712
## regionSudan -0.0335572 0.0162592 -2.064
## regionSweden -0.0045831 0.0105710 -0.434
## regionSwitzerland -0.0081388 0.0105687 -0.770
## regionTaiwan -0.0173315 0.0106110 -1.633
## regionTajikistan -0.0049136 0.0130531 -0.376
## regionThailand 0.1454610 0.0107231 13.565
## regionTogo -0.0039909 0.0155359 -0.257
## regionTunisia -0.0118327 0.0162315 -0.729
## regionTurkey -0.0625871 0.0109254 -5.729
## regionUkraine -0.0107032 0.0156540 -0.684
## regionUnited Kingdom -0.0024650 0.0111106 -0.222
## regionUnited States of America 0.1265614 0.0239929 5.275
## regionUruguay 0.0045713 0.0310715 0.147
## regionVenezuela -0.0283499 0.0106124 -2.671
## regionViet Nam -0.0039285 0.0220969 -0.178
## regionZimbabwe -0.0078505 0.0203424 -0.386
## Pr(>|t|)
## nonstaples < 2e-16 ***
## regionAlbania 0.804018
## regionAlgeria 0.055158 .
## regionArgentina 0.482513
## regionArmenia 0.975448
## regionAustralia 1.31e-10 ***
## regionAustria 0.505611
## regionAzerbaijan 0.990793
## regionBelarus 0.228654
## regionBelgium 0.461464
## regionBenin 0.694777
## regionBosnia and Herzegovina 0.848707
## regionBotswana 0.913998
## regionBrazil < 2e-16 ***
## regionBulgaria 0.966668
## regionCameroon 0.228839
## regionCanada 0.034810 *
## regionChina 9.15e-07 ***
## regionColombia 0.214236
## regionCongo 0.937685
## regionCosta Rica 0.466939
## regionCote dIvoire 0.556945
## regionCroatia 0.869344
## regionCyprus 0.908000
## regionCzech Republic 0.809648
## regionDenmark 0.142417
## regionDominican Republic 0.823783
## regionEstonia 0.890979
## regionFinland 0.805926
## regionFrance 0.637525
## regionGeorgia 0.765284
## regionGermany 0.316057
## regionGreece 0.450164
## regionHungary 0.959705
## regionIceland 0.839750
## regionIreland 0.361446
## regionIsrael 0.424797
## regionItaly 0.000168 ***
## regionJamaica 0.933961
## regionJapan 0.001039 **
## regionKazakhstan 0.898617
## regionKorea, Republic of 0.557091
## regionKyrgyzstan 0.884016
## regionLatvia 0.851521
## regionLithuania 0.775299
## regionLuxembourg 0.981267
## regionMacedonia, the former Yugoslav Republic of 0.889311
## regionMalta 0.972393
## regionMexico 2.60e-12 ***
## regionMoldova, Republic of 0.962603
## regionMontenegro 0.949950
## regionMorocco 0.032765 *
## regionMyanmar 0.264693
## regionNetherlands 7.80e-05 ***
## regionNew Zealand 0.339933
## regionNorway 0.665913
## regionPhilippines 0.334950
## regionPoland 0.954317
## regionPortugal 0.838795
## regionRomania 0.758828
## regionRussian Federation < 2e-16 ***
## regionSenegal 0.507655
## regionSerbia 0.723124
## regionSlovakia 0.795573
## regionSlovenia 0.942668
## regionSpain 0.476445
## regionSudan 0.039202 *
## regionSweden 0.664679
## regionSwitzerland 0.441376
## regionTaiwan 0.102609
## regionTajikistan 0.706651
## regionThailand < 2e-16 ***
## regionTogo 0.797307
## regionTunisia 0.466121
## regionTurkey 1.23e-08 ***
## regionUkraine 0.494252
## regionUnited Kingdom 0.824454
## regionUnited States of America 1.53e-07 ***
## regionUruguay 0.883055
## regionVenezuela 0.007637 **
## regionViet Nam 0.858916
## regionZimbabwe 0.699613
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05382 on 1479 degrees of freedom
## Multiple R-squared: 0.955, Adjusted R-squared: 0.9525
## F-statistic: 382.5 on 82 and 1479 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65301 -0.00331 -0.00010 0.00261 0.33872
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -3.240e-03 1.305e-02 -0.248
## nonstaples 2.605e+00 6.447e-02 40.402
## regionAlgeria -3.590e-02 2.420e-02 -1.483
## regionArgentina 4.107e-02 5.541e-02 0.741
## regionArmenia 2.068e-03 4.023e-02 0.051
## regionAustralia 7.206e-02 1.683e-02 4.283
## regionAustria -3.803e-03 1.680e-02 -0.226
## regionAzerbaijan 3.067e-03 1.983e-02 0.155
## regionBelarus 1.596e-02 1.679e-02 0.951
## regionBelgium 1.104e-02 1.680e-02 0.657
## regionBenin -3.129e-03 2.082e-02 -0.150
## regionBosnia and Herzegovina -3.907e-04 2.307e-02 -0.017
## regionBotswana 1.870e-03 1.820e-02 0.103
## regionBrazil 3.561e-01 1.864e-02 19.099
## regionBulgaria 2.798e-03 1.679e-02 0.167
## regionCameroon -1.968e-02 2.308e-02 -0.853
## regionCanada -3.138e-02 2.094e-02 -1.499
## regionChina -1.721e-01 3.777e-02 -4.557
## regionColombia -1.004e-02 1.686e-02 -0.596
## regionCongo 1.136e-03 2.991e-02 0.038
## regionCosta Rica 1.092e-02 1.679e-02 0.651
## regionCote dIvoire -5.891e-03 2.029e-02 -0.290
## regionCroatia 4.976e-03 1.679e-02 0.296
## regionCyprus 1.996e-03 1.692e-02 0.118
## regionCzech Republic 6.008e-03 1.739e-02 0.346
## regionDenmark 1.874e-02 1.679e-02 1.116
## regionDominican Republic 5.592e-03 1.679e-02 0.333
## regionEstonia 4.746e-03 1.706e-02 0.278
## regionFinland 5.834e-03 1.679e-02 0.348
## regionFrance 8.677e-03 1.738e-02 0.499
## regionGeorgia -7.779e-04 1.875e-02 -0.041
## regionGermany -8.585e-03 1.755e-02 -0.489
## regionGreece -4.753e-03 1.680e-02 -0.283
## regionHungary 2.706e-03 1.679e-02 0.161
## regionIceland 5.374e-03 1.679e-02 0.320
## regionIreland 1.288e-02 1.679e-02 0.767
## regionIsrael -9.168e-03 2.029e-02 -0.452
## regionItaly -3.956e-02 1.726e-02 -2.292
## regionJamaica 8.591e-05 4.023e-02 0.002
## regionJapan 4.102e-02 1.736e-02 2.363
## regionKazakhstan 9.518e-04 2.219e-02 0.043
## regionKorea, Republic of -3.056e-03 1.688e-02 -0.181
## regionKyrgyzstan -1.294e-03 3.370e-02 -0.038
## regionLatvia 5.216e-03 1.679e-02 0.311
## regionLithuania 6.253e-03 1.679e-02 0.373
## regionLuxembourg 2.924e-03 1.875e-02 0.156
## regionMacedonia, the former Yugoslav Republic of 1.771e-03 1.679e-02 0.105
## regionMalta 2.479e-03 2.556e-02 0.097
## regionMexico -7.736e-02 1.731e-02 -4.469
## regionMoldova, Republic of 3.766e-03 1.721e-02 0.219
## regionMontenegro 2.221e-03 2.082e-02 0.107
## regionMorocco -3.003e-02 2.031e-02 -1.478
## regionMyanmar -5.684e-02 5.540e-02 -1.026
## regionNetherlands 4.531e-02 1.682e-02 2.694
## regionNew Zealand 1.332e-02 1.679e-02 0.793
## regionNorway 7.799e-03 1.679e-02 0.465
## regionPhilippines -7.156e-03 1.691e-02 -0.423
## regionPoland 3.857e-03 1.691e-02 0.228
## regionPortugal 1.089e-03 1.679e-02 0.065
## regionRomania -1.453e-04 1.708e-02 -0.009
## regionRussian Federation 1.248e-01 1.801e-02 6.926
## regionSenegal -7.514e-03 2.083e-02 -0.361
## regionSerbia 8.991e-03 2.083e-02 0.432
## regionSlovakia 5.039e-04 1.679e-02 0.030
## regionSlovenia 2.481e-03 1.679e-02 0.148
## regionSpain -4.485e-03 1.695e-02 -0.265
## regionSudan -3.032e-02 2.084e-02 -1.455
## regionSweden -1.343e-03 1.679e-02 -0.080
## regionSwitzerland -4.899e-03 1.679e-02 -0.292
## regionTaiwan -1.409e-02 1.681e-02 -0.838
## regionTajikistan -1.674e-03 1.846e-02 -0.091
## regionThailand 1.487e-01 1.688e-02 8.810
## regionTogo -7.511e-04 2.029e-02 -0.037
## regionTunisia -8.593e-03 2.083e-02 -0.413
## regionTurkey -5.935e-02 1.700e-02 -3.491
## regionUkraine -7.463e-03 2.037e-02 -0.366
## regionUnited Kingdom 7.747e-04 1.711e-02 0.045
## regionUnited States of America 1.298e-01 2.721e-02 4.771
## regionUruguay 7.811e-03 3.370e-02 0.232
## regionVenezuela -2.511e-02 1.681e-02 -1.493
## regionViet Nam -6.888e-04 2.565e-02 -0.027
## regionZimbabwe -4.611e-03 2.417e-02 -0.191
## Pr(>|t|)
## (Intercept) 0.804018
## nonstaples < 2e-16 ***
## regionAlgeria 0.138277
## regionArgentina 0.458686
## regionArmenia 0.959004
## regionAustralia 1.96e-05 ***
## regionAustria 0.820886
## regionAzerbaijan 0.877079
## regionBelarus 0.341899
## regionBelgium 0.511312
## regionBenin 0.880593
## regionBosnia and Herzegovina 0.986491
## regionBotswana 0.918201
## regionBrazil < 2e-16 ***
## regionBulgaria 0.867628
## regionCameroon 0.394009
## regionCanada 0.134127
## regionChina 5.61e-06 ***
## regionColombia 0.551517
## regionCongo 0.969714
## regionCosta Rica 0.515405
## regionCote dIvoire 0.771633
## regionCroatia 0.766916
## regionCyprus 0.906115
## regionCzech Republic 0.729704
## regionDenmark 0.264502
## regionDominican Republic 0.739111
## regionEstonia 0.780916
## regionFinland 0.728220
## regionFrance 0.617790
## regionGeorgia 0.966903
## regionGermany 0.624788
## regionGreece 0.777243
## regionHungary 0.872021
## regionIceland 0.748888
## regionIreland 0.443114
## regionIsrael 0.651507
## regionItaly 0.022061 *
## regionJamaica 0.998296
## regionJapan 0.018248 *
## regionKazakhstan 0.965796
## regionKorea, Republic of 0.856335
## regionKyrgyzstan 0.969382
## regionLatvia 0.756054
## regionLithuania 0.709546
## regionLuxembourg 0.876076
## regionMacedonia, the former Yugoslav Republic of 0.916012
## regionMalta 0.922728
## regionMexico 8.46e-06 ***
## regionMoldova, Republic of 0.826846
## regionMontenegro 0.915078
## regionMorocco 0.139497
## regionMyanmar 0.305068
## regionNetherlands 0.007137 **
## regionNew Zealand 0.427672
## regionNorway 0.642272
## regionPhilippines 0.672224
## regionPoland 0.819613
## regionPortugal 0.948286
## regionRomania 0.993213
## regionRussian Federation 6.44e-12 ***
## regionSenegal 0.718274
## regionSerbia 0.666003
## regionSlovakia 0.976055
## regionSlovenia 0.882540
## regionSpain 0.791358
## regionSudan 0.146019
## regionSweden 0.936249
## regionSwitzerland 0.770506
## regionTaiwan 0.402091
## regionTajikistan 0.927759
## regionThailand < 2e-16 ***
## regionTogo 0.970476
## regionTunisia 0.679960
## regionTurkey 0.000495 ***
## regionUkraine 0.714118
## regionUnited Kingdom 0.963899
## regionUnited States of America 2.02e-06 ***
## regionUruguay 0.816746
## regionVenezuela 0.135544
## regionViet Nam 0.978582
## regionZimbabwe 0.848730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05382 on 1479 degrees of freedom
## Multiple R-squared: 0.9472, Adjusted R-squared: 0.9443
## F-statistic: 327.7 on 81 and 1479 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62000 -0.01527 -0.00614 -0.00004 0.63106
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.123e-03 2.177e-03 2.353 0.0188 *
## nonstaples 2.212e+00 3.512e-02 62.976 <2e-16 ***
## GDP 1.979e-08 1.426e-09 13.882 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07767 on 1499 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.8881, Adjusted R-squared: 0.8879
## F-statistic: 5948 on 2 and 1499 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39874 -0.00356 -0.00008 0.00318 0.25814
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -8.247e-04 1.072e-02 -0.077
## nonstaples 1.095e+00 7.642e-02 14.334
## GDP 8.263e-08 3.004e-09 27.510
## regionAlgeria -1.692e-02 1.990e-02 -0.850
## regionArgentina 3.332e-02 4.552e-02 0.732
## regionArmenia 1.691e-03 3.305e-02 0.051
## regionAustralia 2.687e-02 1.392e-02 1.931
## regionAustria -1.614e-02 1.380e-02 -1.169
## regionAzerbaijan 2.312e-03 1.629e-02 0.142
## regionBelarus 1.943e-02 1.402e-02 1.386
## regionBelgium -7.352e-04 1.542e-02 -0.048
## regionBenin -2.266e-03 1.711e-02 -0.132
## regionBosnia and Herzegovina -1.128e-03 1.896e-02 -0.060
## regionBotswana -2.227e-04 1.495e-02 -0.015
## regionBrazil 4.351e-01 1.559e-02 27.911
## regionBulgaria 4.914e-03 1.379e-02 0.356
## regionCameroon -1.051e-02 1.896e-02 -0.554
## regionCanada -9.761e-02 1.737e-02 -5.620
## regionChina 2.514e-01 3.469e-02 7.245
## regionColombia 1.160e-02 1.387e-02 0.836
## regionCongo -3.136e-04 2.457e-02 -0.013
## regionCosta Rica 9.996e-03 1.379e-02 0.725
## regionCote dIvoire -1.129e-03 1.667e-02 -0.068
## regionCroatia 3.558e-03 1.402e-02 0.254
## regionCyprus -1.211e-03 1.390e-02 -0.087
## regionCzech Republic 6.266e-03 1.428e-02 0.439
## regionDenmark 3.201e-03 1.380e-02 0.232
## regionDominican Republic 7.898e-03 1.379e-02 0.573
## regionEstonia 2.577e-03 1.402e-02 0.184
## regionFinland -5.665e-03 1.380e-02 -0.411
## regionFrance -4.994e-02 1.444e-02 -3.459
## regionGeorgia -1.076e-03 1.540e-02 -0.070
## regionGermany -1.253e-01 1.502e-02 -8.338
## regionGreece -6.379e-03 1.380e-02 -0.462
## regionHungary 5.974e-03 1.380e-02 0.433
## regionIceland 2.327e-03 1.379e-02 0.169
## regionIreland 2.619e-03 1.380e-02 0.190
## regionIsrael -1.948e-02 1.668e-02 -1.168
## regionItaly -9.307e-02 1.431e-02 -6.502
## regionJamaica -8.432e-04 3.305e-02 -0.026
## regionJapan -1.863e-01 1.648e-02 -11.309
## regionKazakhstan 3.255e-03 1.823e-02 0.179
## regionKorea, Republic of -3.710e-02 1.392e-02 -2.666
## regionKyrgyzstan -4.542e-04 2.769e-02 -0.016
## regionLatvia 3.229e-03 1.402e-02 0.230
## regionLithuania 3.723e-03 1.402e-02 0.266
## regionLuxembourg -2.996e-03 1.540e-02 -0.195
## regionMacedonia, the former Yugoslav Republic of 6.465e-04 1.402e-02 0.046
## regionMalta -1.752e-04 2.099e-02 -0.008
## regionMexico -4.998e-02 1.426e-02 -3.505
## regionMoldova, Republic of 3.817e-03 1.428e-02 0.267
## regionMontenegro 3.819e-04 1.762e-02 0.022
## regionMorocco -1.596e-02 1.669e-02 -0.956
## regionMyanmar -2.436e-02 4.553e-02 -0.535
## regionNetherlands 1.759e-02 1.385e-02 1.270
## regionNew Zealand 6.617e-03 1.379e-02 0.480
## regionNorway -1.344e-02 1.381e-02 -0.973
## regionPhilippines 2.718e-02 1.395e-02 1.948
## regionPoland 2.695e-02 1.392e-02 1.936
## regionPortugal -3.624e-03 1.380e-02 -0.263
## regionRomania 8.269e-03 1.403e-02 0.589
## regionRussian Federation 1.818e-01 1.502e-02 12.107
## regionSenegal -4.335e-03 1.711e-02 -0.253
## regionSerbia 1.288e-02 1.762e-02 0.731
## regionSlovakia -6.163e-04 1.414e-02 -0.044
## regionSlovenia -4.896e-04 1.402e-02 -0.035
## regionSpain -3.135e-02 1.396e-02 -2.245
## regionSudan -1.681e-02 1.988e-02 -0.846
## regionSweden -2.160e-02 1.381e-02 -1.564
## regionSwitzerland -4.087e-02 1.386e-02 -2.950
## regionTajikistan -1.464e-03 1.516e-02 -0.097
## regionThailand 1.684e-01 1.388e-02 12.127
## regionTogo -1.094e-03 1.667e-02 -0.066
## regionTunisia -4.680e-03 1.711e-02 -0.274
## regionTurkey -3.701e-02 1.399e-02 -2.645
## regionUkraine 2.643e-02 1.678e-02 1.575
## regionUnited Kingdom -1.159e-01 1.468e-02 -7.895
## regionUnited States of America -5.381e-01 3.296e-02 -16.328
## regionUruguay 4.303e-03 2.769e-02 0.155
## regionVenezuela -2.538e-02 1.381e-02 -1.837
## regionViet Nam 3.836e-02 2.112e-02 1.816
## regionZimbabwe -2.516e-03 1.986e-02 -0.127
## Pr(>|t|)
## (Intercept) 0.938712
## nonstaples < 2e-16 ***
## GDP < 2e-16 ***
## regionAlgeria 0.395249
## regionArgentina 0.464272
## regionArmenia 0.959193
## regionAustralia 0.053713 .
## regionAustria 0.242601
## regionAzerbaijan 0.887164
## regionBelarus 0.166001
## regionBelgium 0.961987
## regionBenin 0.894628
## regionBosnia and Herzegovina 0.952559
## regionBotswana 0.988121
## regionBrazil < 2e-16 ***
## regionBulgaria 0.721646
## regionCameroon 0.579644
## regionCanada 2.29e-08 ***
## regionChina 7.08e-13 ***
## regionColombia 0.403182
## regionCongo 0.989817
## regionCosta Rica 0.468642
## regionCote dIvoire 0.946033
## regionCroatia 0.799647
## regionCyprus 0.930577
## regionCzech Republic 0.660949
## regionDenmark 0.816636
## regionDominican Republic 0.566947
## regionEstonia 0.854160
## regionFinland 0.681431
## regionFrance 0.000558 ***
## regionGeorgia 0.944314
## regionGermany < 2e-16 ***
## regionGreece 0.643980
## regionHungary 0.665032
## regionIceland 0.866042
## regionIreland 0.849480
## regionIsrael 0.242935
## regionItaly 1.09e-10 ***
## regionJamaica 0.979650
## regionJapan < 2e-16 ***
## regionKazakhstan 0.858313
## regionKorea, Republic of 0.007767 **
## regionKyrgyzstan 0.986913
## regionLatvia 0.817829
## regionLithuania 0.790571
## regionLuxembourg 0.845797
## regionMacedonia, the former Yugoslav Republic of 0.963212
## regionMalta 0.993341
## regionMexico 0.000470 ***
## regionMoldova, Republic of 0.789221
## regionMontenegro 0.982711
## regionMorocco 0.339190
## regionMyanmar 0.592731
## regionNetherlands 0.204300
## regionNew Zealand 0.631455
## regionNorway 0.330750
## regionPhilippines 0.051553 .
## regionPoland 0.053046 .
## regionPortugal 0.792807
## regionRomania 0.555789
## regionRussian Federation < 2e-16 ***
## regionSenegal 0.799993
## regionSerbia 0.464875
## regionSlovakia 0.965246
## regionSlovenia 0.972137
## regionSpain 0.024897 *
## regionSudan 0.397877
## regionSweden 0.118084
## regionSwitzerland 0.003231 **
## regionTajikistan 0.923077
## regionThailand < 2e-16 ***
## regionTogo 0.947690
## regionTunisia 0.784506
## regionTurkey 0.008251 **
## regionUkraine 0.115475
## regionUnited Kingdom 5.76e-15 ***
## regionUnited States of America < 2e-16 ***
## regionUruguay 0.876522
## regionVenezuela 0.066357 .
## regionViet Nam 0.069569 .
## regionZimbabwe 0.899169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04421 on 1420 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9657, Adjusted R-squared: 0.9637
## F-statistic: 492.9 on 81 and 1420 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples + staples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65301 -0.01313 -0.00181 0.00369 0.56450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 3.18511 0.04297 74.13 <2e-16 ***
## staples -0.48420 0.03101 -15.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0756 on 1559 degrees of freedom
## Multiple R-squared: 0.9063, Adjusted R-squared: 0.9062
## F-statistic: 7541 on 2 and 1559 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + staples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65054 -0.01555 -0.00434 0.00116 0.56414
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002627 0.002090 1.257 0.209
## nonstaples 3.167009 0.045310 69.897 <2e-16 ***
## staples -0.478379 0.031351 -15.259 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07559 on 1558 degrees of freedom
## Multiple R-squared: 0.8903, Adjusted R-squared: 0.8902
## F-statistic: 6324 on 2 and 1558 DF, p-value: < 2.2e-16
Using the same criteria to select data as established earlier.
# aggregate the food and energy data, dropping NA values
comb_data_all_cats_GCAM_reg <- comb_data_all_cats %>%
group_by(GCAM_region_ID, year) %>%
summarize(EJ = sum(EJ),
PKcal_w_na = sum(PKcal, na.rm = FALSE),
PKcal = sum(PKcal, na.rm = TRUE),
Food_Kt = sum(Food_Kt, na.rm = TRUE),
Food_Kt_w_calorie = sum(Food_Kt_w_calorie, na.rm = TRUE),
GDP_total_thous2015USD_FAO = sum(GDP_pcap_thous2015USD_FAO * pop, na.rm = TRUE),
pop = sum(pop, na.rm = TRUE),
Feed_Kt = sum(Feed_Kt, na.rm = TRUE),
nonstaples_animal = sum(nonstaples_animal, na.rm = TRUE),
nonstaples_other = sum(nonstaples_other, na.rm = TRUE),
staples = sum(staples, na.rm = TRUE)) %>%
mutate(GDP_mil2015USD_FAO = GDP_total_thous2015USD_FAO / 1000,
GDP_pcap_thous2015USD_FAO = GDP_total_thous2015USD_FAO / pop,
nonstaples = nonstaples_animal + nonstaples_other,
staples_frac = staples / PKcal,
nonstaples_frac = nonstaples / PKcal,
EJ_per_PKcal = EJ / PKcal) %>%
ungroup() %>%
left_join(GCAM_region_names)
comb_data_sel_cats_GCAM_reg <- comb_data_all_cats_GCAM_reg %>%
group_by(region) %>%
filter(!all(EJ == 0) & !all(is.na(PKcal) | PKcal == 0)) %>%
ungroup()
comb_data_sel_cats_GCAM_reg_final <- comb_data_sel_cats_GCAM_reg %>%
right_join(GCAM_reg_years_incl) %>%
filter(!is.na(PKcal),
PKcal > 0)
# filter to the data to infill
comb_data_all_cats_GCAM_reg_to_infill <- comb_data_all_cats_GCAM_reg %>%
anti_join(GCAM_reg_years_incl)
# calculate relationships
# just going to infill from the minimum year onwards for now
hist_and_infilled_test_methods_region <- run_models_calc_infill(comb_data_sel_cats_GCAM_reg_final %>%
rename(GDP = GDP_total_thous2015USD_FAO),
comb_data_all_cats_GCAM_reg_to_infill %>% filter(year >= min_year) %>%
rename(GDP = GDP_total_thous2015USD_FAO)) %>%
rename(GDP_total_thous2015USD_FAO = GDP)
##
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85107 -0.01511 0.01045 0.11280 0.77794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.09445 0.02872 38.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2537 on 435 degrees of freedom
## Multiple R-squared: 0.7695, Adjusted R-squared: 0.769
## F-statistic: 1452 on 1 and 435 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74835 -0.10406 -0.08288 0.03422 0.73279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10176 0.01393 7.305 1.34e-12 ***
## PKcal 0.95816 0.03293 29.097 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2397 on 434 degrees of freedom
## Multiple R-squared: 0.6611, Adjusted R-squared: 0.6603
## F-statistic: 846.6 on 1 and 434 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64795 -0.01905 0.00024 0.02269 0.35150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.312624 0.128753 17.962 < 2e-16 ***
## regionArgentina 0.001514 0.112003 0.014 0.989224
## regionAustralia_NZ 0.065496 0.022334 2.933 0.003547 **
## regionBrazil 0.183721 0.035537 5.170 3.65e-07 ***
## regionCanada -0.059897 0.034347 -1.744 0.081919 .
## regionCentral Asia -0.175384 0.080020 -2.192 0.028952 *
## regionChina -2.196125 0.190183 -11.547 < 2e-16 ***
## regionColombia -0.047468 0.022654 -2.095 0.036749 *
## regionEU-12 -0.109201 0.028496 -3.832 0.000147 ***
## regionEU-15 -0.286060 0.076445 -3.742 0.000208 ***
## regionEurope_Eastern -0.063382 0.033541 -1.890 0.059494 .
## regionEurope_Non_EU -0.215491 0.027231 -7.913 2.30e-14 ***
## regionEuropean Free Trade Association -0.009011 0.022052 -0.409 0.683038
## regionJapan -0.100616 0.028342 -3.550 0.000429 ***
## regionMexico -0.205020 0.027586 -7.432 6.16e-13 ***
## regionRussia -0.048545 0.032250 -1.505 0.133013
## regionSouth America_Northern -0.056373 0.022723 -2.481 0.013502 *
## regionSouth Korea -0.072224 0.023301 -3.100 0.002070 **
## regionSoutheast Asia -0.534924 0.053101 -10.074 < 2e-16 ***
## regionTaiwan -0.034926 0.022195 -1.574 0.116350
## regionUSA -0.071141 0.063386 -1.122 0.262365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared: 0.9573, Adjusted R-squared: 0.9551
## F-statistic: 443 on 21 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64795 -0.01905 0.00024 0.02269 0.35150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.001514 0.112003 0.014 0.9892
## PKcal 2.312624 0.128753 17.962 < 2e-16 ***
## regionAustralia_NZ 0.063982 0.113960 0.561 0.5748
## regionBrazil 0.182207 0.115912 1.572 0.1167
## regionCanada -0.061410 0.116777 -0.526 0.5993
## regionCentral Asia -0.176897 0.137052 -1.291 0.1975
## regionChina -2.197639 0.214945 -10.224 < 2e-16 ***
## regionColombia -0.048981 0.113939 -0.430 0.6675
## regionEU-12 -0.110715 0.114519 -0.967 0.3342
## regionEU-15 -0.287574 0.131964 -2.179 0.0299 *
## regionEurope_Eastern -0.064896 0.116397 -0.558 0.5775
## regionEurope_Non_EU -0.217005 0.114330 -1.898 0.0584 .
## regionEuropean Free Trade Association -0.010524 0.114016 -0.092 0.9265
## regionJapan -0.102129 0.114495 -0.892 0.3729
## regionMexico -0.206533 0.114381 -1.806 0.0717 .
## regionRussia -0.050059 0.115246 -0.434 0.6642
## regionSouth America_Northern -0.057887 0.114049 -0.508 0.6120
## regionSouth Korea -0.073738 0.113942 -0.647 0.5179
## regionSoutheast Asia -0.536438 0.121366 -4.420 1.26e-05 ***
## regionTaiwan -0.036440 0.113980 -0.320 0.7494
## regionUSA -0.072654 0.125584 -0.579 0.5632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared: 0.9295, Adjusted R-squared: 0.9261
## F-statistic: 273.5 on 20 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + GDP, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55125 -0.07106 -0.03461 0.04629 0.72168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.124e-02 9.489e-03 4.347 1.73e-05 ***
## PKcal 6.723e-01 2.470e-02 27.223 < 2e-16 ***
## GDP 4.806e-11 2.006e-12 23.955 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1574 on 433 degrees of freedom
## Multiple R-squared: 0.8543, Adjusted R-squared: 0.8536
## F-statistic: 1269 on 2 and 433 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ PKcal + GDP + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42314 -0.02126 -0.00011 0.02144 0.26629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.226e-02 8.828e-02 0.139 0.889650
## PKcal 1.182e+00 1.238e-01 9.551 < 2e-16
## GDP 7.356e-11 4.616e-12 15.938 < 2e-16
## regionAustralia_NZ 1.619e-02 8.987e-02 0.180 0.857120
## regionBrazil 3.176e-01 9.175e-02 3.461 0.000594
## regionCanada -1.191e-01 9.211e-02 -1.293 0.196614
## regionCentral Asia -1.090e-01 1.081e-01 -1.008 0.314051
## regionChina -8.965e-01 1.881e-01 -4.767 2.59e-06
## regionColombia -2.345e-02 8.982e-02 -0.261 0.794154
## regionEU-12 -2.705e-02 9.041e-02 -0.299 0.764950
## regionEU-15 -5.890e-01 1.057e-01 -5.571 4.56e-08
## regionEurope_Eastern -7.468e-03 9.181e-02 -0.081 0.935216
## regionEurope_Non_EU -1.289e-01 9.028e-02 -1.428 0.154025
## regionEuropean Free Trade Association -6.571e-02 8.993e-02 -0.731 0.465426
## regionJapan -2.505e-01 9.072e-02 -2.761 0.006018
## regionMexico -1.347e-01 9.027e-02 -1.493 0.136308
## regionRussia 6.721e-02 9.113e-02 0.737 0.461268
## regionSouth America_Northern -5.450e-02 8.989e-02 -0.606 0.544632
## regionSouth Korea -8.248e-02 8.981e-02 -0.918 0.358962
## regionSoutheast Asia -1.992e-01 9.797e-02 -2.033 0.042672
## regionTaiwan -1.696e-02 8.985e-02 -0.189 0.850389
## regionUSA -6.029e-01 1.044e-01 -5.774 1.52e-08
##
## (Intercept)
## PKcal ***
## GDP ***
## regionAustralia_NZ
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina ***
## regionColombia
## regionEU-12
## regionEU-15 ***
## regionEurope_Eastern
## regionEurope_Non_EU
## regionEuropean Free Trade Association
## regionJapan **
## regionMexico
## regionRussia
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia *
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08812 on 414 degrees of freedom
## Multiple R-squared: 0.9563, Adjusted R-squared: 0.9541
## F-statistic: 431.4 on 21 and 414 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58454 -0.03084 -0.00833 0.05096 0.61644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.57670 0.03693 69.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1513 on 435 degrees of freedom
## Multiple R-squared: 0.918, Adjusted R-squared: 0.9178
## F-statistic: 4869 on 1 and 435 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56417 -0.05481 -0.03459 0.03206 0.60096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029797 0.009128 3.264 0.00118 **
## nonstaples 2.482717 0.046507 53.383 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1497 on 434 degrees of freedom
## Multiple R-squared: 0.8678, Adjusted R-squared: 0.8675
## F-statistic: 2850 on 1 and 434 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65396 -0.01592 -0.00002 0.01969 0.33582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.5314864 0.1189007 21.291 < 2e-16
## regionArgentina 0.0401654 0.1031191 0.390 0.697102
## regionAustralia_NZ 0.0806606 0.0204120 3.952 9.12e-05
## regionBrazil 0.3621971 0.0252941 14.319 < 2e-16
## regionCanada -0.0320123 0.0313586 -1.021 0.307922
## regionCentral Asia -0.0748663 0.0730905 -1.024 0.306292
## regionChina -0.1532522 0.0665663 -2.302 0.021815
## regionColombia -0.0113681 0.0204473 -0.556 0.578530
## regionEU-12 0.0029355 0.0225874 0.130 0.896659
## regionEU-15 0.0247097 0.0513317 0.481 0.630504
## regionEurope_Eastern 0.0034103 0.0300955 0.113 0.909836
## regionEurope_Non_EU -0.0742120 0.0213833 -3.471 0.000574
## regionEuropean Free Trade Association -0.0004722 0.0202714 -0.023 0.981427
## regionJapan 0.0429533 0.0218885 1.962 0.050387
## regionMexico -0.0756466 0.0217520 -3.478 0.000559
## regionRussia 0.1282448 0.0237003 5.411 1.06e-07
## regionSouth America_Northern -0.0288557 0.0207183 -1.393 0.164437
## regionSouth Korea -0.0041704 0.0205028 -0.203 0.838916
## regionSoutheast Asia -0.0038444 0.0263813 -0.146 0.884209
## regionTaiwan -0.0160890 0.0203103 -0.792 0.428722
## regionUSA 0.1510049 0.0445830 3.387 0.000774
##
## nonstaples ***
## regionArgentina
## regionAustralia_NZ ***
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina *
## regionColombia
## regionEU-12
## regionEU-15
## regionEurope_Eastern
## regionEurope_Non_EU ***
## regionEuropean Free Trade Association
## regionJapan .
## regionMexico ***
## regionRussia ***
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared: 0.9637, Adjusted R-squared: 0.9619
## F-statistic: 525 on 21 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65396 -0.01592 -0.00002 0.01969 0.33582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.040165 0.103119 0.390 0.69710
## nonstaples 2.531486 0.118901 21.291 < 2e-16 ***
## regionAustralia_NZ 0.040495 0.105016 0.386 0.69998
## regionBrazil 0.322032 0.105631 3.049 0.00245 **
## regionCanada -0.072178 0.107632 -0.671 0.50285
## regionCentral Asia -0.115032 0.126224 -0.911 0.36265
## regionChina -0.193418 0.120761 -1.602 0.10999
## regionColombia -0.051534 0.105015 -0.491 0.62388
## regionEU-12 -0.037230 0.105201 -0.354 0.72360
## regionEU-15 -0.015456 0.113624 -0.136 0.89187
## regionEurope_Eastern -0.036755 0.107260 -0.343 0.73202
## regionEurope_Non_EU -0.114377 0.105061 -1.089 0.27693
## regionEuropean Free Trade Association -0.040638 0.105036 -0.387 0.69903
## regionJapan 0.002788 0.105113 0.027 0.97885
## regionMexico -0.115812 0.105098 -1.102 0.27113
## regionRussia 0.088079 0.105415 0.836 0.40389
## regionSouth America_Northern -0.069021 0.105103 -0.657 0.51174
## regionSouth Korea -0.044336 0.105013 -0.422 0.67310
## regionSoutheast Asia -0.044010 0.105893 -0.416 0.67791
## regionTaiwan -0.056254 0.105027 -0.536 0.59251
## regionUSA 0.110839 0.110993 0.999 0.31856
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared: 0.9401, Adjusted R-squared: 0.9372
## F-statistic: 325.7 on 20 and 415 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59170 -0.05228 -0.03204 0.02548 0.62888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.652e-02 8.548e-03 3.102 0.00205 **
## nonstaples 2.101e+00 6.483e-02 32.408 < 2e-16 ***
## GDP 1.849e-11 2.329e-12 7.940 1.75e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.14 on 433 degrees of freedom
## Multiple R-squared: 0.8846, Adjusted R-squared: 0.8841
## F-statistic: 1660 on 2 and 433 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + GDP + region, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45238 -0.01651 -0.00009 0.01962 0.25855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.383e-02 8.588e-02 0.394 0.6938
## nonstaples 1.407e+00 1.291e-01 10.894 < 2e-16
## GDP 6.523e-11 4.804e-12 13.577 < 2e-16
## regionAustralia_NZ 8.079e-03 8.749e-02 0.092 0.9265
## regionBrazil 3.841e-01 8.809e-02 4.361 1.64e-05
## regionCanada -1.186e-01 8.970e-02 -1.322 0.1869
## regionCentral Asia -8.116e-02 1.052e-01 -0.772 0.4406
## regionChina 1.058e-01 1.030e-01 1.028 0.3048
## regionColombia -2.795e-02 8.748e-02 -0.320 0.7495
## regionEU-12 6.588e-03 8.767e-02 0.075 0.9401
## regionEU-15 -3.904e-01 9.858e-02 -3.961 8.80e-05
## regionEurope_Eastern 2.151e-03 8.937e-02 0.024 0.9808
## regionEurope_Non_EU -8.001e-02 8.753e-02 -0.914 0.3612
## regionEuropean Free Trade Association -7.704e-02 8.752e-02 -0.880 0.3792
## regionJapan -1.731e-01 8.849e-02 -1.957 0.0511
## regionMexico -9.046e-02 8.755e-02 -1.033 0.3021
## regionRussia 1.339e-01 8.786e-02 1.524 0.1283
## regionSouth America_Northern -6.159e-02 8.753e-02 -0.704 0.4821
## regionSouth Korea -6.491e-02 8.747e-02 -0.742 0.4585
## regionSoutheast Asia 4.433e-02 8.843e-02 0.501 0.6164
## regionTaiwan -3.081e-02 8.749e-02 -0.352 0.7249
## regionUSA -4.304e-01 1.007e-01 -4.275 2.37e-05
##
## (Intercept)
## nonstaples ***
## GDP ***
## regionAustralia_NZ
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina
## regionColombia
## regionEU-12
## regionEU-15 ***
## regionEurope_Eastern
## regionEurope_Non_EU
## regionEuropean Free Trade Association
## regionJapan .
## regionMexico
## regionRussia
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08582 on 414 degrees of freedom
## Multiple R-squared: 0.9586, Adjusted R-squared: 0.9565
## F-statistic: 456 on 21 and 414 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ 0 + nonstaples + staples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60034 -0.03229 -0.00884 0.05340 0.58710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.98542 0.06334 47.137 < 2e-16 ***
## staples -0.39135 0.05075 -7.711 8.6e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1421 on 434 degrees of freedom
## Multiple R-squared: 0.9279, Adjusted R-squared: 0.9275
## F-statistic: 2791 on 2 and 434 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = EJ ~ nonstaples + staples, data = used_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59751 -0.05314 -0.02936 0.03656 0.57528
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.024456 0.008625 2.836 0.00479 **
## nonstaples 2.895952 0.070305 41.191 < 2e-16 ***
## staples -0.379541 0.050515 -7.513 3.33e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.141 on 433 degrees of freedom
## Multiple R-squared: 0.8831, Adjusted R-squared: 0.8825
## F-statistic: 1635 on 2 and 433 DF, p-value: < 2.2e-16
The overall coefficients are relatively similar when calculated from the processed GCAM input data and when calculated from the FAO data more directly, either on a country level or a GCAM region level, (generally <20% difference, except for the non-staples/staples separated model, which has larger differences between the coefficients), which is good validation. The intercepts do vary a bit more. The coefficients are also more different for GDP, but the units (and calibration/values) for GDP are different.